[yt-svn] commit/yt: 6 new changesets
commits-noreply at bitbucket.org
commits-noreply at bitbucket.org
Thu Jul 24 04:50:28 PDT 2014
6 new commits in yt:
https://bitbucket.org/yt_analysis/yt/commits/0c505346d531/
Changeset: 0c505346d531
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-24 00:25:40
Summary: Removing merger tree functionality.
Affected #: 4 files
diff -r e97899b530819bec400dbe2127aba867d227dc0b -r 0c505346d53132d4d13791493c3413d4be69afde yt/analysis_modules/halo_merger_tree/api.py
--- a/yt/analysis_modules/halo_merger_tree/api.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-API for halo_merger_tree
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-from .merger_tree import \
- DatabaseFunctions, \
- MergerTree, \
- MergerTreeConnect, \
- Node, \
- Link, \
- MergerTreeDotOutput, \
- MergerTreeTextOutput
-
-from .enzofof_merger_tree import \
- HaloCatalog, \
- find_halo_relationships, \
- EnzoFOFMergerTree, \
- plot_halo_evolution
diff -r e97899b530819bec400dbe2127aba867d227dc0b -r 0c505346d53132d4d13791493c3413d4be69afde yt/analysis_modules/halo_merger_tree/merger_tree.py
--- a/yt/analysis_modules/halo_merger_tree/merger_tree.py
+++ /dev/null
@@ -1,1161 +0,0 @@
-"""
-MergerTree class and member functions.
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-import numpy as np
-import os, glob, time, gc, md5, sys
-import h5py
-import types
-
-from yt.funcs import *
-
-from yt.analysis_modules.halo_finding.halo_objects import \
- FOFHaloFinder, HaloFinder, parallelHF
-from yt.analysis_modules.halo_profiler.multi_halo_profiler import \
- HaloProfiler
-from yt.convenience import load
-from yt.utilities.logger import ytLogger as mylog
-import yt.extern.pydot as pydot
-from yt.utilities.spatial import cKDTree
-from yt.utilities.parallel_tools.parallel_analysis_interface import \
- ParallelDummy, \
- ParallelAnalysisInterface, \
- parallel_blocking_call
-
-try:
- import sqlite3 as sql
-except ImportError:
- mylog.error("sqlite3 not imported!")
-
-column_types = {
-"GlobalHaloID":"INTEGER",
-"SnapCurrentTimeIdentifier":"INTEGER",
-"SnapZ":"FLOAT",
-"SnapHaloID":"INTEGER",
-"HaloMass":"FLOAT",
-"NumPart":"INTEGER",
-"CenMassX":"FLOAT",
-"CenMassY":"FLOAT",
-"CenMassZ":"FLOAT",
-"BulkVelX":"FLOAT",
-"BulkVelY":"FLOAT",
-"BulkVelZ":"FLOAT",
-"MaxRad":"FLOAT",
-"ChildHaloID0":"INTEGER",
-"ChildHaloFrac0":"FLOAT",
-"ChildHaloID1":"INTEGER",
-"ChildHaloFrac1":"FLOAT",
-"ChildHaloID2":"INTEGER",
-"ChildHaloFrac2":"FLOAT",
-"ChildHaloID3":"INTEGER",
-"ChildHaloFrac3":"FLOAT",
-"ChildHaloID4":"INTEGER",
-"ChildHaloFrac4":"FLOAT"}
-
-# In order.
-columns = ["GlobalHaloID", "SnapCurrentTimeIdentifier", "SnapZ",
-"SnapHaloID", "HaloMass", "NumPart", "CenMassX", "CenMassY",
-"CenMassZ", "BulkVelX", "BulkVelY", "BulkVelZ", "MaxRad",
-"ChildHaloID0", "ChildHaloFrac0",
-"ChildHaloID1", "ChildHaloFrac1",
-"ChildHaloID2", "ChildHaloFrac2",
-"ChildHaloID3", "ChildHaloFrac3",
-"ChildHaloID4", "ChildHaloFrac4"]
-
-# Below we make the SQL command that creates the table "Halos" in the
-# database. This table is where all the data is stored.
-# Each column of data is named and its datatype is specified.
-# The GlobalHaloID is given the PRIMARY KEY property, which means that
-# the SQLite machinery assigns a consecutive and unique integer value
-# to that field automatically as each new entry is entered (that is,
-# if GlobalHaloID isn't specified already).
-create_db_line = "CREATE TABLE Halos ("
-for i, col in enumerate(columns):
- if i == 0:
- create_db_line += "%s %s PRIMARY KEY," % (col, column_types[col])
- else:
- create_db_line += " %s %s," % (col, column_types[col])
-# Clean of trailing comma, and closing stuff.
-create_db_line = create_db_line[:-1] + ");"
-
-NumNeighbors = 15
-NumDB = 5
-
-def minus_one():
- return -1
-
-class DatabaseFunctions(object):
- # Common database functions so it doesn't have to be repeated.
- def _open_database(self):
- # open the database. Check to make sure the database file exists.
- if not os.path.exists(self.database):
- mylog.error("The database file %s cannot be found. Exiting." % \
- self.database)
- return False
- self.conn = sql.connect(self.database)
- self.cursor = self.conn.cursor()
- return True
-
- def _close_database(self):
- # close the database cleanly.
- self.cursor.close()
- self.conn.close()
-
-class MergerTree(DatabaseFunctions, ParallelAnalysisInterface):
- r"""Build a merger tree of halos over a time-ordered set of snapshots.
- This will run a halo finder to find the halos first if it hasn't already
- been done. The output is a SQLite database file, which may need to
- be stored on a different disk than the data snapshots. See the full
- documentation for details.
-
- Parameters
- ----------
-
- restart_files : List of strings
- A list containing the paths to the forward time-ordered set of
- data snapshots.
- database : String
- Name of SQLite database file. Default = "halos.db".
- halo_finder_function : HaloFinder name
- The name of the halo finder to use if halo finding is run by
- the merger tree. Options: HaloFinder, FOFHaloFinder, parallelHF.
- Note that this is not a string, so no quotes. Default = HaloFinder.
- halo_finder_threshold : Float
- If using HaloFinder or parallelHF, the value of the density threshold
- used when halo finding. Default = 160.0.
- FOF_link_length : Float
- If using FOFHaloFinder, the linking length between particles.
- Default = 0.2.
- dm_only : Boolean
- When halo finding, whether to restrict to only dark matter particles.
- Default = False.
- refresh : Boolean
- True forces the halo finder to run even if the halo data has been
- detected on disk. Default = False.
- index : Boolean
- SQLite databases can have added to them an index which greatly
- speeds up future queries of the database,
- at the cost of doubling the disk space used by the file.
- Default = True.
-
- Examples
- --------
-
- >>> rf = ['/scratch/user/sim1/DD0000/data0000',
- ... '/scratch/user/sim1/DD0001/data0001',
- ... '/scratch/user/sim1/DD0002/data0002']
- >>> MergerTree(rf, database = '/home/user/sim1-halos.db',
- ... halo_finder_function=parallelHF)
- """
- def __init__(self, restart_files=[], database='halos.db',
- halo_finder_function=HaloFinder, halo_finder_threshold=160.0,
- FOF_link_length=0.2, dm_only=False, refresh=False,
- index=True):
- ParallelAnalysisInterface.__init__(self)
- self.restart_files = restart_files # list of enzo restart files
- self.with_halos = np.ones(len(restart_files), dtype='bool')
- self.database = database # the sqlite database of haloes.
- self.halo_finder_function = halo_finder_function # which halo finder to use
- self.halo_finder_threshold = halo_finder_threshold # overdensity threshold
- self.FOF_link_length= FOF_link_length # For FOF
- self.dm_only = dm_only
- self.refresh = refresh
- self.index = index
- self.zs = {}
- # MPI stuff
- if self.comm.rank is None:
- self.comm.rank = 0
- if self.comm.size is None:
- self.comm.size = 1
- # Get to work.
- if self.refresh and self.comm.rank == 0:
- try:
- os.unlink(self.database)
- except:
- pass
- if self.comm.rank == 0:
- self._open_create_database()
- self._create_halo_table()
- self._run_halo_finder_add_to_db()
- # Find the h5 file names for all the halos.
- for snap in self.restart_files:
- self._build_h5_refs(snap)
- # Find out how much work is already stored in the database.
- if self.comm.rank == 0:
- z_progress = self._find_progress()
- else:
- z_progress = None
- z_progress = self.comm.mpi_bcast(z_progress)
- # Loop over the pairs of snapshots to locate likely neighbors, and
- # then use those likely neighbors to compute fractional contributions.
- last = None
- self.write_values = []
- self.write_values_dict = defaultdict(dict)
- for snap, pair in enumerate(zip(self.restart_files[:-1], self.restart_files[1:])):
- if not self.with_halos[snap] or not self.with_halos[snap+1]:
- continue
- if self.zs[pair[0]] > z_progress:
- continue
- self._find_likely_children(pair[0], pair[1])
- # last is the data for the parent dataset, which can be supplied
- # as the child from the previous round for all but the first loop.
- last = self._compute_child_fraction(pair[0], pair[1], last)
- if self.comm.rank == 0:
- mylog.info("Updating database with parent-child relationships.")
- self._copy_and_update_db()
- # This has to happen because we delete the old database above.
- self._open_create_database()
- del last
- if self.comm.rank == 0:
- if self.index:
- self._write_index()
- self._close_database()
- self.comm.barrier()
- mylog.info("Done!")
-
- def _read_halo_lists(self):
- self.halo_lists = []
- for i,file in enumerate(self.halo_files):
- hp = HaloProfiler(self.restart_files[i], halo_list_file=file)
- self.halo_lists.append(hp.all_halos)
-
- def _run_halo_finder_add_to_db(self):
- for cycle, file in enumerate(self.restart_files):
- gc.collect()
- ds = load(file)
- self.zs[file] = ds.current_redshift
- self.period = ds.domain_right_edge - ds.domain_left_edge
- # If the halos are already found, skip this data step, unless
- # refresh is True.
- dir = os.path.dirname(file)
- if os.path.exists(os.path.join(dir, 'MergerHalos.out')) and \
- os.path.exists(os.path.join(dir, 'MergerHalos.txt')) and \
- glob.glob(os.path.join(dir, 'MergerHalos*h5')) is not [] and \
- not self.refresh:
- pass
- else:
- # Run the halo finder.
- if self.halo_finder_function == FOFHaloFinder:
- halos = self.halo_finder_function(ds,
- link=self.FOF_link_length, dm_only=self.dm_only)
- else:
- halos = self.halo_finder_function(ds,
- threshold=self.halo_finder_threshold, dm_only=self.dm_only)
- halos.write_out(os.path.join(dir, 'MergerHalos.out'))
- halos.write_particle_lists(os.path.join(dir, 'MergerHalos'))
- halos.write_particle_lists_txt(os.path.join(dir, 'MergerHalos'))
- if len(halos) == 0:
- mylog.info("Dataset %s has no halos." % file)
- self.with_halos[cycle] = False
- continue
- del halos
- # Now add halo data to the db if it isn't already there by
- # checking the first halo.
- continue_check = False
- if self.comm.rank == 0:
- currt = ds.unique_identifier
- line = "SELECT GlobalHaloID from Halos where SnapHaloID=0\
- and SnapCurrentTimeIdentifier=%d;" % currt
- self.cursor.execute(line)
- result = self.cursor.fetchone()
- if result != None:
- continue_check = True
- continue_check = self.comm.mpi_bcast(continue_check)
- if continue_check:
- continue
- red = ds.current_redshift
- # Read the halos off the disk using the Halo Profiler tools.
- hp = HaloProfiler(file, halo_list_file='MergerHalos.out',
- halo_list_format={'id':0, 'mass':1, 'numpart':2, 'center':[7, 8, 9], 'velocity':[10, 11, 12], 'r_max':13})
- if len(hp.all_halos) == 0:
- mylog.info("Dataset %s has no halos." % file)
- self.with_halos[cycle] = False
- del hp
- continue
- mylog.info("Entering halos into database for z=%f" % red)
- if self.comm.rank == 0:
- for ID,halo in enumerate(hp.all_halos):
- numpart = int(halo['numpart'])
- values = (None, currt, red, ID, halo['mass'], numpart,
- halo['center'][0], halo['center'][1], halo['center'][2],
- halo['velocity'][0], halo['velocity'][1], halo['velocity'][2],
- halo['r_max'] / ds['mpc'],
- -1,0.,-1,0.,-1,0.,-1,0.,-1,0.)
- # 23 question marks for 23 data columns.
- line = ''
- for i in range(23):
- line += '?,'
- # Pull off the last comma.
- line = 'INSERT into Halos VALUES (' + line[:-1] + ')'
- self.cursor.execute(line, values)
- self.conn.commit()
- self.comm.barrier()
- del hp
-
- def _open_create_database(self):
- # open the database. This creates the database file on disk if it
- # doesn't already exist. Open it on root only.
- self.conn = sql.connect(self.database)
- self.cursor = self.conn.cursor()
-
- def _create_halo_table(self):
- # Handle the error if the table already exists by doing nothing.
- try:
- self.cursor.execute(create_db_line)
- self.conn.commit()
- except sql.OperationalError:
- pass
-
- def _find_likely_children(self, parentfile, childfile):
- # For each halo in the parent list, identify likely children in the
- # list of children.
-
- # First, read in the locations of the child halos.
- child_ds = load(childfile)
- child_t = child_ds.unique_identifier
- if self.comm.rank == 0:
- line = "SELECT SnapHaloID, CenMassX, CenMassY, CenMassZ FROM \
- Halos WHERE SnapCurrentTimeIdentifier = %d" % child_t
- self.cursor.execute(line)
-
- mylog.info("Finding likely parents for z=%1.5f child halos." % \
- child_ds.current_redshift)
-
- # Build the kdtree for the children by looping over the fetched rows.
- # Normalize the points for use only within the kdtree.
- child_points = []
- for row in self.cursor:
- child_points.append([row[1] / self.period[0],
- row[2] / self.period[1],
- row[3] / self.period[2]])
- child_points = np.array(child_points)
- kdtree = cKDTree(child_points, leafsize = 10)
-
- # Find the parent points from the database.
- parent_ds = load(parentfile)
- parent_t = parent_ds.unique_identifier
- if self.comm.rank == 0:
- line = "SELECT SnapHaloID, CenMassX, CenMassY, CenMassZ FROM \
- Halos WHERE SnapCurrentTimeIdentifier = %d" % parent_t
- self.cursor.execute(line)
-
- # Loop over the returned rows, and find the likely neighbors for the
- # parents.
- candidates = {}
- for row in self.cursor:
- # Normalize positions for use within the kdtree.
- query = np.array([row[1] / self.period[0],
- row[2] / self.period[1],
- row[3] / self.period[2]])
- NNtags = kdtree.query(query, NumNeighbors, period=self.period)[1]
- nIDs = []
- for n in NNtags:
- if n not in nIDs:
- nIDs.append(n)
- # We need to fill in fake halos if there aren't enough halos,
- # which can happen at high redshifts.
- while len(nIDs) < NumNeighbors:
- nIDs.append(-1)
- candidates[row[0]] = nIDs
- del kdtree
- else:
- candidates = None
-
- # Sync across tasks.
- candidates = self.comm.mpi_bcast(candidates)
- self.candidates = candidates
-
- # This stores the masses contributed to each child candidate.
- # The +1 is an extra element in the array that collects garbage
- # values. This is allowing us to eliminate a try/except later.
- # This extra array element will be cut off eventually.
- self.child_mass_arr = np.zeros(len(candidates)*NumNeighbors + 1,
- dtype='float64')
- # Records where to put the entries in the above array.
- self.child_mass_loc = defaultdict(dict)
- # Fill it out with sub-nested default dicts that point to the
- # garbage slot, and then fill it will correct values for (possibly)
- # related parent/child halo pairs.
- for i,halo in enumerate(sorted(candidates)):
- self.child_mass_loc[halo] = defaultdict(minus_one)
- for j, child in enumerate(candidates[halo]):
- self.child_mass_loc[halo][child] = i*NumNeighbors + j
-
- def _build_h5_refs(self, filename):
- # For this snapshot, add lists of file names that contain the
- # particle info for each halo.
- if not hasattr(self, 'h5files'):
- self.h5files = defaultdict(dict)
- if not hasattr(self, 'names'):
- self.names = defaultdict(set)
- file_ds = load(filename)
- currt = file_ds.unique_identifier
- dir = os.path.dirname(filename)
- h5txt = os.path.join(dir, 'MergerHalos.txt')
- lines = file(h5txt)
- names = set([])
- for i,line in enumerate(lines):
- # Get rid of the carriage returns and turn it into a list.
- line = line.strip().split()
- self.h5files[currt][i] = line[1:]
- names.update(line[1:])
- self.names[currt].update(line[1:])
- lines.close()
-
- def _compute_child_fraction(self, parentfile, childfile, last):
- # Given a parent and child snapshot, and a list of child candidates,
- # compute what fraction of the parent halo goes to each of the children.
-
- parent_ds = load(parentfile)
- child_ds = load(childfile)
- parent_currt = parent_ds.unique_identifier
- child_currt = child_ds.unique_identifier
-
- mylog.info("Computing fractional contribututions of particles to z=%1.5f halos." % \
- child_ds.current_redshift)
-
- if last == None:
- # First we're going to read in the particles, haloIDs and masses from
- # the parent dataset.
- parent_names = list(self.names[parent_currt])
- parent_names.sort()
- parent_IDs = []
- parent_masses = []
- parent_halos = []
- for i,pname in enumerate(parent_names):
- if i>=self.comm.rank and i%self.comm.size==self.comm.rank:
- h5fp = h5py.File(pname)
- for group in h5fp:
- gID = int(group[4:])
- thisIDs = h5fp[group]['particle_index'][:]
- thisMasses = h5fp[group]['ParticleMassMsun'][:]
- parent_IDs.append(thisIDs)
- parent_masses.append(thisMasses)
- parent_halos.append(np.ones(len(thisIDs),
- dtype='int32') * gID)
- del thisIDs, thisMasses
- h5fp.close()
- # Sort the arrays by particle index in ascending order.
- if len(parent_IDs)==0:
- parent_IDs = np.array([], dtype='int64')
- parent_masses = np.array([], dtype='float64')
- parent_halos = np.array([], dtype='int32')
- else:
- parent_IDs = np.concatenate(parent_IDs).astype('int64')
- parent_masses = np.concatenate(parent_masses).astype('float64')
- parent_halos = np.concatenate(parent_halos).astype('int32')
- sort = parent_IDs.argsort()
- parent_IDs = parent_IDs[sort]
- parent_masses = parent_masses[sort]
- parent_halos = parent_halos[sort]
- del sort
- else:
- # We can use old data and save disk reading.
- (parent_IDs, parent_masses, parent_halos) = last
- # Used to communicate un-matched particles.
- parent_send = np.ones(parent_IDs.size, dtype='bool')
-
- # Now get the child halo data.
- child_names = list(self.names[child_currt])
- child_names.sort()
- child_IDs = []
- child_masses = []
- child_halos = []
- for i,cname in enumerate(child_names):
- if i>=self.comm.rank and i%self.comm.size==self.comm.rank:
- h5fp = h5py.File(cname)
- for group in h5fp:
- gID = int(group[4:])
- thisIDs = h5fp[group]['particle_index'][:]
- thisMasses = h5fp[group]['ParticleMassMsun'][:]
- child_IDs.append(thisIDs)
- child_masses.append(thisMasses)
- child_halos.append(np.ones(len(thisIDs),
- dtype='int32') * gID)
- del thisIDs, thisMasses
- h5fp.close()
- # Sort the arrays by particle index in ascending order.
- if len(child_IDs)==0:
- child_IDs = np.array([], dtype='int64')
- child_masses = np.array([], dtype='float64')
- child_halos = np.array([], dtype='int32')
- else:
- child_IDs = np.concatenate(child_IDs).astype('int64')
- child_masses = np.concatenate(child_masses)
- child_halos = np.concatenate(child_halos)
- sort = child_IDs.argsort()
- child_IDs = child_IDs[sort]
- child_masses = child_masses[sort]
- child_halos = child_halos[sort]
- del sort
-
- child_send = np.ones(child_IDs.size, dtype='bool')
-
- # Match particles in halos.
- self._match(parent_IDs, child_IDs, parent_halos, child_halos,
- parent_masses, parent_send, child_send)
-
- # Now we send all the un-matched particles to the root task for one more
- # pass. This depends on the assumption that most of the particles do
- # not move very much between data dumps, so that not too many particles
- # will be dumped on the single task.
- parent_IDs_tosend = parent_IDs[parent_send]
- parent_masses_tosend = parent_masses[parent_send]
- parent_halos_tosend = parent_halos[parent_send]
- child_IDs_tosend = child_IDs[child_send]
- child_halos_tosend = child_halos[child_send]
- del parent_send, child_send
-
- parent_IDs_tosend = self.comm.par_combine_object(parent_IDs_tosend,
- datatype="array", op="cat")
- parent_masses_tosend = self.comm.par_combine_object(parent_masses_tosend,
- datatype="array", op="cat")
- parent_halos_tosend = self.comm.par_combine_object(parent_halos_tosend,
- datatype="array", op="cat")
- child_IDs_tosend = self.comm.par_combine_object(child_IDs_tosend,
- datatype="array", op="cat")
- child_halos_tosend = self.comm.par_combine_object(child_halos_tosend,
- datatype="array", op="cat")
-
- # Resort the received particles.
- Psort = parent_IDs_tosend.argsort()
- parent_IDs_tosend = parent_IDs_tosend[Psort]
- parent_masses_tosend = parent_masses_tosend[Psort]
- parent_halos_tosend = parent_halos_tosend[Psort]
- Csort = child_IDs_tosend.argsort()
- child_IDs_tosend = child_IDs_tosend[Csort]
- child_halos_tosend = child_halos_tosend[Csort]
- del Psort, Csort
-
- # Now again, but only on the root task.
- if self.comm.rank == 0:
- self._match(parent_IDs_tosend, child_IDs_tosend,
- parent_halos_tosend, child_halos_tosend, parent_masses_tosend)
-
- # Now we sum up the contributions globally.
- self.child_mass_arr = self.comm.mpi_allreduce(self.child_mass_arr)
-
- # Trim off the garbage collection.
- self.child_mass_arr = self.child_mass_arr[:-1]
-
- if self.comm.rank == 0:
- # Turn these Msol masses into percentages of the parent.
- line = "SELECT HaloMass FROM Halos WHERE SnapCurrentTimeIdentifier=%d \
- ORDER BY SnapHaloID ASC;" % parent_currt
- self.cursor.execute(line)
- mark = 0
- result = self.cursor.fetchone()
- while result:
- mass = result[0]
- self.child_mass_arr[mark:mark+NumNeighbors] /= mass
- mark += NumNeighbors
- result = self.cursor.fetchone()
-
- # Get the global ID for the SnapHaloID=0 from the child, this will
- # be used to prevent unnecessary SQL reads.
- line = "SELECT GlobalHaloID FROM Halos WHERE SnapCurrentTimeIdentifier=%d \
- AND SnapHaloID=0;" % child_currt
- self.cursor.execute(line)
- baseChildID = self.cursor.fetchone()[0]
- else:
- baseChildID = None
-
- # Sync up data on all tasks.
- self.child_mass_arr = self.comm.mpi_bcast(self.child_mass_arr)
- baseChildID = self.comm.mpi_bcast(baseChildID)
-
- # Now we prepare a big list of writes to put in the database.
- for i,parent_halo in enumerate(sorted(self.candidates)):
- child_indexes = []
- child_per = []
- for j,child in enumerate(self.candidates[parent_halo]):
- if child == -1:
- # Account for fake children.
- child_indexes.append(-1)
- child_per.append(0.)
- continue
- # We need to get the GlobalHaloID for this child.
- child_globalID = baseChildID + child
- child_indexes.append(child_globalID)
- child_per.append(self.child_mass_arr[i*NumNeighbors + j])
- # Sort by percentages, desending.
- child_per, child_indexes = zip(*sorted(zip(child_per, child_indexes), reverse=True))
- values = []
- for pair_count, pair in enumerate(zip(child_indexes, child_per)):
- if pair_count == NumDB: break
- values.extend([int(pair[0]), float(pair[1])])
- #values.extend([parent_currt, parent_halo])
- # This has the child ID, child percent listed NumDB times, followed
- # by the currt and this parent halo ID (SnapHaloID).
- #values = tuple(values)
- self.write_values.append(values)
- self.write_values_dict[parent_currt][parent_halo] = values
-
- # Clean up.
- del parent_IDs, parent_masses, parent_halos
- del parent_IDs_tosend, parent_masses_tosend
- del parent_halos_tosend, child_IDs_tosend, child_halos_tosend
- gc.collect()
-
- return (child_IDs, child_masses, child_halos)
-
- def _match(self, parent_IDs, child_IDs, parent_halos, child_halos,
- parent_masses, parent_send = None, child_send = None):
- # Pick out IDs that are in both arrays.
- parent_in_child = np.in1d(parent_IDs, child_IDs, assume_unique = True)
- child_in_parent = np.in1d(child_IDs, parent_IDs, assume_unique = True)
- # Pare down the arrays to just matched particle IDs.
- parent_halos_cut = parent_halos[parent_in_child]
- child_halos_cut = child_halos[child_in_parent]
- parent_masses_cut = parent_masses[parent_in_child]
- # Mark the IDs that have matches so they're not sent later.
- if parent_send is not None:
- parent_send[parent_in_child] = False
- child_send[child_in_parent] = False
- # For matching pairs of particles, add the contribution of the mass.
- # Occasionally, there are matches of particle IDs where the parent
- # and child halos have not been identified as likely relations,
- # and in that case loc will be returned as -1, which is the 'garbage'
- # position in child_mass_arr. This will be trimmed off later.
- for i,pair in enumerate(zip(parent_halos_cut, child_halos_cut)):
- loc = self.child_mass_loc[pair[0]][pair[1]]
- self.child_mass_arr[loc] += parent_masses_cut[i]
- if parent_send is None:
- mylog.info("Clean-up round matched %d of %d parents and %d children." % \
- (parent_in_child.sum(), parent_IDs.size, child_IDs.size))
-
- def _copy_and_update_db(self):
- """
- Because doing an UPDATE of a SQLite database is really slow, what we'll
- do here is basically read in lines from the database, and then insert
- the parent-child relationships, writing to a new DB.
- """
- # All of this happens only on the root task!
- temp_name = self.database + '-tmp'
- to_write = []
- # Open the temporary database.
- try:
- os.remove(temp_name)
- except OSError:
- pass
- temp_conn = sql.connect(temp_name)
- temp_cursor = temp_conn.cursor()
- line = "CREATE TABLE Halos (GlobalHaloID INTEGER PRIMARY KEY,\
- SnapCurrentTimeIdentifier INTEGER, SnapZ FLOAT, SnapHaloID INTEGER, \
- HaloMass FLOAT,\
- NumPart INTEGER, CenMassX FLOAT, CenMassY FLOAT,\
- CenMassZ FLOAT, BulkVelX FLOAT, BulkVelY FLOAT, BulkVelZ FLOAT,\
- MaxRad FLOAT,\
- ChildHaloID0 INTEGER, ChildHaloFrac0 FLOAT, \
- ChildHaloID1 INTEGER, ChildHaloFrac1 FLOAT, \
- ChildHaloID2 INTEGER, ChildHaloFrac2 FLOAT, \
- ChildHaloID3 INTEGER, ChildHaloFrac3 FLOAT, \
- ChildHaloID4 INTEGER, ChildHaloFrac4 FLOAT);"
- temp_cursor.execute(line)
- temp_conn.commit()
- # Get all the data!
- self.cursor.execute("SELECT * FROM Halos;")
- results = self.cursor.fetchone()
- while results:
- results = list(results)
- currt = results[1]
- hid = results[3]
- # If for some reason this halo doesn't have relationships,
- # we'll just keep the old results the same.
- try:
- lookup = self.write_values_dict[currt][hid]
- new = tuple(results[:-10] + lookup)
- except KeyError:
- new = tuple(results)
- to_write.append(new)
- results = self.cursor.fetchone()
- # Now write to the temp database.
- # 23 question marks for 23 data columns.
- line = ''
- for i in range(23):
- line += '?,'
- # Pull off the last comma.
- line = 'INSERT into Halos VALUES (' + line[:-1] + ')'
- for insert in to_write:
- temp_cursor.execute(line, insert)
- temp_conn.commit()
- temp_cursor.close()
- temp_conn.close()
- self._close_database()
- os.rename(temp_name, self.database)
-
- def _write_index(self):
- mylog.info("Creating database index.")
- line = "CREATE INDEX IF NOT EXISTS HalosIndex ON Halos ("
- for name in columns:
- line += name +","
- line = line[:-1] + ");"
- self.cursor.execute(line)
-
- def _find_progress(self):
- # This queries the database to see how far along work has already come
- # to identify parent->child relationships.
- line = """SELECT ChildHaloID0, SnapZ from halos WHERE SnapHaloID = 0
- ORDER BY SnapZ DESC;"""
- self.cursor.execute(line)
- results = self.cursor.fetchone()
- while results:
- results = list(results)
- if results[0] == -1:
- # We've hit a dump that does not have relationships. Save this.
- return results[1] # the SnapZ.
- results = self.cursor.fetchone()
- return 0.
-
-class MergerTreeConnect(DatabaseFunctions):
- r"""Create a convenience object for accessing data from the halo database.
-
- Parameters
- ----------
- database : String
- The name of the halo database to access. Default = 'halos.db'.
-
- Examples
- -------
- >>> mtc = MergerTreeConnect('/home/user/sim1-halos.db')
- """
- def __init__(self, database='halos.db'):
- self.database = database
- result = self._open_database()
- if not result:
- return None
-
- def close(self):
- r"""Cleanly close access to the database.
-
- Examples
- --------
- >>> mtc.close()
- """
- # To be more like typical Python open/close.
- self._close_database()
-
- def query(self, string):
- r"""Performs a query of the database and returns the results as a list
- of tuples, even if the result is singular.
-
- Parameters
- ----------
-
- string : str
- The SQL query of the database.
-
- Examples
- --------
-
- >>> results = mtc.query("SELECT GlobalHaloID from Halos where SnapHaloID = 0 and \
- ... SnapZ = 0;")
- """
- # Query the database and return a list of tuples.
- if string is None:
- mylog.error("You must enter a SQL query.")
- return None
- items = []
- self.cursor.execute(string)
- results = self.cursor.fetchone()
- while results:
- items.append(results)
- results = self.cursor.fetchone()
- return items
-
- def get_GlobalHaloID(self, SnapHaloID, z):
- r"""Returns the GlobalHaloID for the given halo.
-
- Parameters
- ----------
-
- SnapHaloID : Integer
- The index label for the halo of interest, equivalent to
- the first column of the halo finder text output file.
- z : Float
- The redshift for the halo of interest. The value returned will be
- for the halo with SnapHaloID equal to ID (above) with redshift
- closest to this value.
-
- Examples
- --------
-
- >>> this_halo = mtc.get_GlobalHaloID(0, 0.)
- """
- string = "SELECT GlobalHaloID,SnapZ FROM Halos WHERE SnapHaloID = %d;" \
- % SnapHaloID
- minz = 99999.
- # If -1 is returned, something went wrong.
- this_halo = -1
- self.cursor.execute(string)
- results = self.cursor.fetchone()
- while results:
- if abs(results[1] - z) < minz:
- minz = abs(results[1] - z)
- this_halo = results[0]
- results = self.cursor.fetchone()
- return this_halo
-
- def get_halo_parents(self, GlobalHaloID):
- r"""Returns a list of the parent halos to the given halo, along with
- the contribution fractions from parent to child.
-
- This function returns a list of lists, where each entry in the top list
- is [GlobalHaloID, ChildHaloFrac] of the parent halo in relationship
- to the given child halo.
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The GlobalHaloID of the halo of interest.
-
- Examples
- --------
- >>> parents = mtc.get_halo_parents(1688)
- >>> print parents
- [[1544, 0.9642857141249418],
- [1613, 0.0],
- [1614, 0.0],
- [1489, 0.0],
- [1512, 0.0],
- [1519, 0.0],
- [1609, 0.0]]
- """
- parents = []
- for i in range(NumDB):
- string = "SELECT GlobalHaloID, ChildHaloFrac%d FROM Halos\
- WHERE ChildHaloID%d=%d;" % (i, i, GlobalHaloID)
- self.cursor.execute(string)
- results = self.cursor.fetchone()
- while results:
- parents.append([results[0], results[1]])
- results = self.cursor.fetchone()
- return parents
-
- def get_direct_parent(self, GlobalHaloID):
- r"""Returns the GlobalHaloID of the direct parent of the given halo.
-
- This is accomplished by identifying the most massive parent halo
- that contributes at least 50% of its mass to the given halo.
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The GlobalHaloID of the halo of interest.
-
- Examples
- --------
- >>> parent = mtc.get_direct_parent(1688)
- >>> print parent
- 1544
- """
- parents = self.get_halo_parents(GlobalHaloID)
- mass = 0
- ID = None
- for parent in parents:
- if parent[1] < 0.5: continue
- info = self.get_halo_info(parent[0])
- if info['HaloMass'] > mass:
- mass = info['HaloMass']
- ID = parent[0]
- return ID
-
- def get_halo_info(self, GlobalHaloID):
- r"""Returns all available information for the given GlobalHaloID
- in the form of a dict.
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The unique index for the halo of interest.
-
- Examples
- --------
- >>> info = mtc.get_halo_info(1544)
- >>> print info
- {'BulkVelX': -32759799.359999999,
- 'BulkVelY': -28740239.109999999,
- 'BulkVelZ': -20066000.690000001,
- 'CenMassX': 0.23059111360000001,
- 'CenMassY': 0.4061139809,
- 'CenMassZ': 0.80882763749999997,
- 'ChildHaloFrac0': 0.9642857141249418,
- 'ChildHaloFrac1': 0.0,
- 'ChildHaloFrac2': 0.0,
- 'ChildHaloFrac3': 0.0,
- 'ChildHaloFrac4': 0.0,
- 'ChildHaloID0': 1688,
- 'ChildHaloID1': 1712,
- 'ChildHaloID2': 1664,
- 'ChildHaloID3': 1657,
- 'ChildHaloID4': 1634,
- 'GlobalHaloID': 1544,
- 'HaloMass': 20934692770000.0,
- 'MaxRad': 0.01531299899,
- 'NumPart': 196,
- 'SnapCurrentTimeIdentifier': 1275946788,
- 'SnapHaloID': 56,
- 'SnapZ': 0.024169713061444002}
- """
- string = "SELECT * FROM Halos WHERE GlobalHaloID=%d;" % GlobalHaloID
- d = {}
- self.cursor.execute(string)
- results = self.cursor.fetchone()
- for pair in zip(columns, results):
- d[pair[0]] = pair[1]
- return d
-
-class Node(object):
- def __init__(self, CoM, mass, parentIDs, z, color):
- self.CoM = CoM
- self.mass = mass
- self.parentIDs = parentIDs # In descending order of contribution
- self.z = z
- self.color = color
-
-class Link(object):
- def __init__(self):
- self.childIDs = []
- self.fractions = []
-
-class MergerTreeDotOutput(DatabaseFunctions, ParallelAnalysisInterface):
- r"""Output the merger tree history for a given set of halo(s) in Graphviz
- format.
-
- Parameters
- ----------
-
- halos : Integer or list of integers
- If current_time below is not specified or is None, this is an integer
- or list of integers with the GlobalHaloIDs of the halos to be
- tracked. If current_time is specified, this is the SnapHaloIDs
- for the halos to be tracked, which is identical to what is in
- HopAnalysis.out files (for example).
- database : String
- The name of the database file. Default = 'halos.db'.
- dotfile : String
- The name of the file to write to. Default = 'MergerTree.gv'.
- The suffix of this name gives the format of the output file,
- so 'MergerTree.jpg' would output a jpg file. "dot -v" (from the
- command line) will print
- a list of image formats supported on the system. The default
- suffix '.gv' will output the results to a text file in the Graphviz
- markup language.
- current_time : Integer
- The SnapCurrentTimeIdentifier for the snapshot for the halos to
- be tracked. This is identical to the CurrentTimeIdentifier in
- Enzo restart files. Default = None.
- link_min : Float
- When establishing a parent/child relationship, this is the minimum
- mass fraction of the parent halo contributed to
- the child halo that will be tracked
- while building the Graphviz file. Default = 0.2.
-
- Examples
- --------
-
- >>> MergerTreeDotOutput(halos=182842, database='/home/user/sim1-halos.db',
- ... dotfile = 'halo-182842.gv')
- """
- def __init__(self, halos=None, database='halos.db',
- dotfile='MergerTree.gv', current_time=None, link_min=0.2):
- ParallelAnalysisInterface.__init__(self)
- self.database = database
- self.link_min = link_min
- if halos is None:
- mylog.error("Please provide at least one halo to start the tree. Exiting.")
- return None
- result = self._open_database()
- if not result:
- mylog.warn("The database did not open correctly!")
- return None
- if type(halos) == types.IntType:
- halos = [halos]
- if current_time is not None:
- halos = self._translate_haloIDs(halos, current_time)
- newhalos = set(halos)
- # Create the pydot graph object.
- self.graph = pydot.Dot('galaxy', graph_type='digraph')
- # Build some initially empty subgraphs, which are used to identify
- # nodes that are on the same rank (redshift).
- line = "SELECT DISTINCT SnapZ FROM Halos;"
- self.cursor.execute(line)
- self.subgs = {}
- result = self.cursor.fetchone()
- while result:
- self.subgs[result[0]] = pydot.Subgraph('', rank = 'same')
- self.graph.add_subgraph(self.subgs[result[0]])
- result = self.cursor.fetchone()
- # For the first set of halos.
- self._add_nodes(newhalos)
- # Recurse over parents.
- while len(newhalos) > 0:
- mylog.info("Finding parents for %d children." % len(newhalos))
- newhalos = self._find_parents(newhalos)
- self._add_nodes(newhalos)
- self._write_dotfile(dotfile)
- return None
-
- def _translate_haloIDs(self, halos, current_time):
- # If the input is in the haloID equivalent to SnapHaloID, translate them
- # to GlobalHaloIDs.
- new_haloIDs=[]
- for halo in halos:
- line = "SELECT GlobalHaloID FROM Halos WHERE SnapHaloID=? AND \
- SnapCurrentTimeIdentifier=? limit 1;"
- values = (halo, current_time)
- self.cursor.execute(line, values)
- new_haloIDs.append(self.cursor.fetchone()[0])
- return new_haloIDs
-
- def _find_parents(self, halos):
- # Given a set of halos, find their parents and add that to each of their
- # node records. At the same time, make a link record for that
- # relationship.
- # This stores the newly discovered parent halos.
- newhalos = set([])
- for halo in halos:
- line = "SELECT GlobalHaloID, ChildHaloFrac0,\
- ChildHaloFrac1, ChildHaloFrac2,ChildHaloFrac3, ChildHaloFrac4,\
- ChildHaloID0, ChildHaloID1, ChildHaloID2, \
- ChildHaloID3, ChildHaloID4 \
- FROM Halos WHERE\
- ChildHaloID0=? or ChildHaloID1=? or ChildHaloID2=? or\
- ChildHaloID3=? or ChildHaloID4=?;"
- values = (halo, halo, halo, halo, halo)
- self.cursor.execute(line, values)
- result = self.cursor.fetchone()
- while result:
- res = list(result)
- pID = result[0]
- dsracs = res[1:6]
- cIDs = res[6:11]
- for pair in zip(cIDs, dsracs):
- if pair[1] <= self.link_min or pair[0] != halo:
- continue
- else:
- self.graph.add_edge(pydot.Edge(pID, halo,
- label = "%3.2f%%" % float(pair[1]*100),
- color = "blue",
- fontsize = "10"))
- newhalos.add(pID)
- result = self.cursor.fetchone()
- return newhalos
-
- def _add_nodes(self, newhalos):
- # Each call of this function always happens for a set of newhalos that
- # are at the same z. To give the halos color we will figure out how
- # many halos total were found this z.
- # There's probably a way to do this with only one SQL operation.
- if len(newhalos) == 0:
- return
- ahalo = list(newhalos)[0]
- line = 'SELECT SnapCurrentTimeIdentifier FROM Halos WHERE GlobalHaloID=?;'
- values = (ahalo,)
- self.cursor.execute(line, values)
- result = self.cursor.fetchone()
- # Use currt to get the number.
- line = 'SELECT max(SnapHaloID) FROM Halos where SnapCurrentTimeIdentifier=?;'
- values = (result[0],)
- self.cursor.execute(line, values)
- maxID = self.cursor.fetchone()[0]
- # For the new halos, create nodes for them.
- for halo in newhalos:
- line = 'SELECT SnapZ, HaloMass, CenMassX, CenMassY, CenMassZ,\
- SnapHaloID FROM Halos WHERE GlobalHaloID=? limit 1;'
- value = (halo,)
- self.cursor.execute(line, value)
- result = self.cursor.fetchone()
- # Add the node to the pydot graph.
- color_float = 1. - float(result[5])/(maxID+1)
- self.graph.add_node(pydot.Node(halo,
- label = "{%1.3e\\n(%1.3f,%1.3f,%1.3f)}" % \
- (result[1], result[2], result[3], result[4]),
- shape = "record",
- color = "%0.3f 1. %0.3f" % (color_float, color_float)))
- # Add this node to the correct subgraph.
- self.subgs[result[0]].add_node(pydot.Node(halo))
- # If this was the first node added to this subgraph, also add
- # the lone node for the redshift value.
- if len(self.subgs[result[0]].get_node_list()) == 1:
- self.subgs[result[0]].add_node(pydot.Node("%1.5e" % result[0],
- label = "%1.5f" % result[0],
- shape = "record", color = "green"))
-
- def _write_dotfile(self, dotfile):
- # Based on the suffix of the file name, write out the result to a file.
- suffix = dotfile.split(".")[-1]
- if suffix == "gv": suffix = "raw"
- mylog.info("Writing %s format %s to disk." % (suffix, dotfile))
- self.graph.write("%s" % dotfile, format=suffix)
-
-class MergerTreeTextOutput(DatabaseFunctions, ParallelAnalysisInterface):
- r"""Dump the contents of the merger tree database to a text file.
- This is generally not recommended.
-
- Parameters
- ----------
- database : String
- Name of the database to access. Default = 'halos.db'.
- outfile : String
- Name of the file to write to. Default = 'MergerTreeDB.txt'.
-
- Examples
- --------
- >>> MergerTreeTextOutput(database='/home/user/sim1-halos.db',
- ... outfile='halos-db.txt')
- """
- def __init__(self, database='halos.db', outfile='MergerTreeDB.txt'):
- ParallelAnalysisInterface.__init__(self)
- self.database = database
- self.outfile = outfile
- result = self._open_database()
- if not result:
- mylog.warn("Database file not read correctly!")
- return None
- self._write_out()
- self._close_database()
- return None
-
- def _write_out(self):
- # Essentially dump the contents of the database into a text file.
- fp = open(self.outfile, "w")
- # Make the header line.
- spacing = {}
- for column in columns:
- spacing[column] = (max(15,len(column)+1))
- line = "# "
- for column in columns:
- line += "%s" % column.ljust(spacing[column])
- line += "\n"
- fp.write(line)
- # Get the data.
- line = "SELECT * FROM Halos ORDER BY SnapZ DESC, SnapHaloID ASC;"
- self.cursor.execute(line)
- results = self.cursor.fetchone()
- # Write out the columns.
- while results:
- line = " "
- for i,column in enumerate(columns):
- if column_types[column] == "FLOAT":
- this = "%1.6e" % results[i]
- line += this.ljust(spacing[column])
- if column_types[column] == "INTEGER":
- this = "%d" % results[i]
- line += this.ljust(spacing[column])
- line += "\n"
- fp.write(line)
- results = self.cursor.fetchone()
- fp.close()
-
diff -r e97899b530819bec400dbe2127aba867d227dc0b -r 0c505346d53132d4d13791493c3413d4be69afde yt/analysis_modules/halo_merger_tree/setup.py
--- a/yt/analysis_modules/halo_merger_tree/setup.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-import setuptools
-import os
-import sys
-import os.path
-
-
-def configuration(parent_package='', top_path=None):
- from numpy.distutils.misc_util import Configuration
- config = Configuration('halo_merger_tree', parent_package, top_path)
- config.make_config_py() # installs __config__.py
- #config.make_svn_version_py()
- return config
https://bitbucket.org/yt_analysis/yt/commits/950c0a524631/
Changeset: 950c0a524631
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-24 01:42:01
Summary: Updating halo merger tree docs.
Affected #: 3 files
diff -r 0c505346d53132d4d13791493c3413d4be69afde -r 950c0a524631f152732f8df6bda4dbe1fa572e98 doc/source/analyzing/analysis_modules/halo_analysis.rst
--- a/doc/source/analyzing/analysis_modules/halo_analysis.rst
+++ b/doc/source/analyzing/analysis_modules/halo_analysis.rst
@@ -11,4 +11,5 @@
halo_transition
halo_finding
halo_mass_function
+ halo_merger_tree
halo_analysis_example
diff -r 0c505346d53132d4d13791493c3413d4be69afde -r 950c0a524631f152732f8df6bda4dbe1fa572e98 doc/source/analyzing/analysis_modules/halo_merger_tree.rst
--- /dev/null
+++ b/doc/source/analyzing/analysis_modules/halo_merger_tree.rst
@@ -0,0 +1,6 @@
+.. _merger_tree:
+
+Halo Merger Tree
+================
+
+.. note:: As of :code:`yt-3.0`, the halo merger tree functionality has been removed to be replaced by machinery that works with the ``HaloCatalog`` object. In the mean time, this functionality can still be found in :code:`yt-2.x`.
diff -r 0c505346d53132d4d13791493c3413d4be69afde -r 950c0a524631f152732f8df6bda4dbe1fa572e98 doc/source/analyzing/analysis_modules/merger_tree.rst
--- a/doc/source/analyzing/analysis_modules/merger_tree.rst
+++ /dev/null
@@ -1,767 +0,0 @@
-.. _merger_tree:
-
-Halo Merger Tree
-================
-
-.. note:: At the moment the merger tree is not yet implemented using new
- halo catalog functionality.
-
-The Halo Merger Tree extension is capable of building a database of halo mergers
-over a set of time-ordered Enzo datasets. The fractional contribution of older
-'parent' halos to younger 'child' halos is calculated by comparing the unique
-index labels of their constituent particles. The data is stored in a
-`SQLite <http://sqlite.org/>`_ database which enables the use of powerful
-and fast SQL queries over all the halos.
-
-General Overview
-----------------
-
-The first requirement is a set of sequential datasets.
-The detail of the merger tree is increased as the difference in
-time between snapshots is reduced, at the cost of higher computational effort
-for the tree itself and and disk usage for the snapshots.
-The merger tree relies on the output of one of the Halo Finders in yt, and the
-user can choose which one to use.
-The merger tree is capable of running the halo finder if it hasn't already
-been done.
-Once halo finding is accomplished for all the data snapshots, the halo
-lineage is calculated by comparing the particle membership of halos between
-pairs of time steps.
-The halo data and tree data is stored in the SQLite database.
-
-Clearly, another requirement is that Python has the
-`sqlite3 library <http://docs.python.org/library/sqlite3.html>`_
-installed.
-This should be built along with everything else yt needs
-if the ``install_script.sh`` was used.
-
-The merger tree can be calculated in parallel, and if necessary, it will run
-the halo finding in parallel as well. Please see the note below about the
-special considerations needed for Network File Systems.
-
-There is a convenience-wrapper for querying the database, called
-``MergerTreeConnect``.
-It simplifies accessing data in the database.
-
-There are two output classes for the merger tree. The ``MergerTreeDotOutput`` class
-outputs the tree for a user-specified subset of halos to a
-`Graphviz format <http://graphviz.org/>`_ file.
-Graphviz is an open-source package for visualizing connected objects in a
-graphical way.
-There are binary distributions for all major operating systems.
-It is also possible to dump the contents of the SQLite database to a simple text file
-with the ``MergerTreeTextOutput`` class.
-The data is saved in columnar format.
-
-Conceptual Primer
-~~~~~~~~~~~~~~~~~
-
-The best way to view the merger tree extension is as a two-part process.
-First, the merger tree is built and stored in the database.
-This process can be quite time consuming, depending on the size of the simulation,
-and the number and size of halos found in the snapshots.
-This is not a process one wants to do very often, and why it is separate
-from the analysis parts.
-
-The second part is actually a many-part process, which is the analysis of the
-merger tree itself.
-The first step is computationally intensive, but the analysis step
-is user-intensive.
-The user needs to decide what to pull out of the merger tree
-and figure out how to extract the needed data with SQL statements.
-Once an analysis pipeline is written, it should run very fast for even
-very large databases.
-
-A Note About Network File Systems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Accessing a SQLite database stored on a Network (or Distributed) File System (NFS)
-is a risky thing to do, particularly if more than one task wants to write
-at the same time (`see more here <http://www.sqlite.org/lockingv3.html#how_to_corrupt>`_).
-NFS disks can store files on multiple physical hard drives, and it can take time
-for changes made by one task to appear to all the parallel tasks.
-Only one task of the merger tree ever interacts with the database,
-so these dangers are minimal,
-but in general it's a good idea to know something about the disk used to
-store the database.
-
-In general, it is recommended to keep the database on a 'real disk'
-(/tmp for example, if all the tasks are on the same SMP node,
-or RAM disk for extra speed) if possible,
-but it should work on a NFS disk as well.
-If a temporary disk is used to store the database while it's being built,
-remember to copy the file to a permanent disk after the merger tree script
-is finished.
-
-
-Running and Using the Halo Merger Tree
---------------------------------------
-
-It is very simple to create a merger tree database for a series of snapshots.
-The most difficult part is creating an ordered list of Enzo restart files.
-There are two ways to do it, by hand or with the EnzoSimulation extension.
-
-By Hand
-~~~~~~~
-
-Here is an example of how to build the list and build the database by hand.
-Here, the snapshots are stored in directories named DD????, and the enzo
-restart file named data????, where ???? is a four digit zero-padded integer.
-The final snapshot considered (most progressed in time) is DD0116,
-and the earliest that will be examined is DD0100.
-The database will be saved to ``/path/to/database/halos.db``.
-This example below works identically in serial or in parallel.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
- from yt.analysis_modules.halo_finding.api import *
-
- files = []
- start = 100
- finish = 116
- for i in range(start, finish + 1):
- files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
- MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-If the halos have not been found previously for the snapshots, the halo finder
-will be run automatically. See the note about this below.
-
-Using EnzoSimulation
-~~~~~~~~~~~~~~~~~~~~
-
-Here is how to build the input list of restart files using the EnzoSimulation
-extension.
-It is possible to set range and interval between snapshots.
-Please see the EnzoSimulation
-documentation (:ref:`analyzing-an-entire-simulation`) for details.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
- from yt.analysis_modules.halo_finding.api import *
- import yt.analysis_modules.simulation_handler.api as ES
-
- es = ES.EnzoSimulation('/path/to/snapshots/simulation.par')
-
- files = []
- for output in es.allOutputs:
- files.append(output['filename'])
-
- MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-Merger Tree Parallelism
------------------------
-
-If the halos are to be found during the course of building the merger tree,
-run with an appropriate number of tasks to the size of the dataset and the
-halo finder used.
-The speed of the merger tree itself,
-which compares halo membership in parallel very effectively,
-is almost completely constrained by the read/write times of the SQLite file.
-In tests with the halos pre-located, there is not much speedup beyond two MPI tasks.
-There is no negative effect with running the merger tree with more tasks (which is
-why if halos are to be found by the merger tree, the merger tree should be
-run with as many tasks as that step requires), and indeed if the simulation
-is a large one, running in parallel does provide memory parallelism,
-which is important.
-
-How The Database Is Handled In Analysis Restarts
-------------------------------------------------
-
-The Merger Tree is designed to allow the merger tree database to be built
-incrementally.
-For example, if a simulation is currently being run, the merger
-tree database can be built for the available datasets, and when new ones are
-created, the database extended to include them.
-So if there are going to be
-60 data snapshots total (indexed (0, 1, 2, ..., 59)), and only 50 are saved when the
-tree is first built, the analysis should be done on datasets [0, 49].
-If the last ten become available, re-run the merger tree on datasets [49, 59]
-referencing the same database as before.
-By referencing the same database as before, work does not need to be repeated.
-
-If the merger tree process is interrupted before completion (say, if the
-jobs walltime is exceeded and the scheduler kills it), just run the exact
-same job again.
-The merger tree will check to see what work has already been completed, and
-resume where it left off.
-
-Additional Parameters
-~~~~~~~~~~~~~~~~~~~~~
-
-When calling ``MergerTree``, there are three parameters that control how the
-halo finder is run, if it needs to be run.
-
- * ``halo_finder_function`` (name) - Which of the halo finders (:ref:`halo_finding`)
- to use. Default: ``HaloFinder`` (HOP).
- * ``halo_finder_threshold`` (float) - When using HOP or Parallel HOP, this sets the
- threshold used. Default: 80.0.
- * ``FOF_link_length`` (float) - When using Friends of Friends (FOFHaloFinder), this sets
- the inter-particle link length used. Default: 0.2.
- * ``dm_only`` (bool) - Whether to include stars (False), or only the dark
- matter particles when building halos (True).
- Default: False.
- * ``refresh`` (bool) - If set to True, this will run the halo finder and
- rebuild the database regardless of whether or not the halo files or
- database exist on disk already.
- Default: False.
- * ``index`` (bool) - Whether to add an index to the SQLite file. True makes
- SQL searches faster at the cost of additional disk space. Default=True.
-
-Example using Parallel HOP:
-
-.. code-block:: python
-
- MergerTree(restart_files=files, database='/path/to/database/halos.db',
- halo_finder_function=parallelHF, halo_finder_threshold=100.)
-
-Pre-Computing Halos
-~~~~~~~~~~~~~~~~~~~
-
-If halo finding is to happen before the merger tree is calculated, and the
-work is not to be wasted, special care
-should be taken to ensure that all the data required for the merger tree is
-saved.
-By default, the merger tree looks for files that begin with the name ``MergerHalos``
-in the same directory as each Enzo restart file,
-and if those files are missing or renamed, halo finding will be performed again.
-If ``halos`` is the list of halos returned by the halo finder, these three
-commands should be called to save the needed data:
-
-.. code-block:: python
-
- halos.write_out('MergerHalos.out')
- halos.write_particle_lists('MergerHalos')
- halos.write_particle_lists_txt('MergerHalos')
-
-There is a convenience function that will call the three functions above
-at one time:
-
-.. code-block:: python
-
- halos.dump('MergerHalos')
-
-Please see the documents on halo finding for more information on what these
-commands do (:ref:`halo_finding`).
-
-Accessing Data in the Database
-------------------------------
-
-SQLite databases support nearly all of the standard SQL queries.
-It is possible to write very complicated and powerful SQL queries, but below
-only simple examples will are shown. Please see other resources (WWW, books) for
-more on how to write SQL queries.
-
-It is possible to read and modify a SQLite database from the command line using
-the ``sqlite3`` command (e.g. ``sqlite3 database.db``). It can be very convenient
-to use this to quickly inspect a database, but is not suitable for extracting or inserting
-large amounts of data. There are many examples (again, see the WWW or books)
-available on how to use the command line ``sqlite3`` command.
-
-The table containing halo data in the database is named 'Halos'.
-All queries for halo data will come from this table.
-The table has these columns:
-
- #. ``GlobalHaloID`` (int) - A fully-unique identifier for the halo.
- #. ``SnapCurrentTimeIdentifier`` (int) - An unique time identifier for the snapshot
- the halo comes from. Equivalent to 'CurrentTimeIdentifier' from the Enzo
- restart file.
- #. ``SnapZ`` (float) - The redshift for the halo.
- #. ``SnapHaloID`` (int) - The halo ID for the halo taken from the output of the
- halo finder (i.e. 'halos.write_out("HopAnalysis.out")'). It is unique for halos
- in the same snapshot, but not unique across the full database.
- #. ``HaloMass`` (float) - The total mass of dark matter in the halo as
- identified by the halo finder.
- #. ``NumPart`` (int) - Number of dark matter particles in the halo as identified
- by the halo finder.
- #. ``CenMassX``,
- #. ``CenMassY``,
- #. ``CenMassZ`` (float) - The location of the center of mass of the halo in code units.
- #. ``BulkVelX``,
- #. ``BulkVelY``,
- #. ``BulkVelZ`` (float) - The velocity of the center of mass of the halo in
- cgs units.
- #. ``MaxRad`` (float) - The distance from the center of mass to the most
- remote particle in the halo in code units.
- #. ``ChildHaloID0`` (int) - The GlobalHaloID of the child halo which receives
- the greatest proportion of particles from this halo.
- #. ``ChildHaloFrac0`` (float) - The fraction by mass of particles from this
- (parent) halo that goes to the child halo recorded in ChildHaloID0. If
- all the particles from this parent halo goes to ChildHaloID0, this number will
- be 1.0, regardless of the mass of the child halo.
- #. ``ChildHaloID[1-4]``, ``ChildHaloFrac[1-4]`` (int, float) - Similar to the
- columns above, these store the second through fifth greatest recipients of
- particle mass from this parent halo.
-
-.. warning::
-
- A value of -1 in any of the ``ChildHaloID`` columns corresponds to
- a fake (placeholder) child halo entry. There is no halo with an ID equal to -1.
- This is used during the merger tree construction,
- and must be accounted for when constructing SQL queries of the database.
-
-To get the data for the most massive halo at the end of the simulation,
-there is a convenience class that simplifies database access. Using it, a query
-might look like this:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- line = "SELECT * FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
- results = mtc.query(line)
-
-``results`` is a list containing a singular tuple containing the values for that halo in
-the same order as
-given above for the columns.
-
-Another way to get the same information is to use one of the convenience functions.
-The following example shows how to do this:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- this_halo = mtc.get_GlobalHaloID(0, 0.0)
-
-The first term of ``get_GlobalHaloID`` is the ``SnapHaloID`` for the halo of
-interest, and the second is the redshift of interest.
-The results are stored in ``this_halo`` as an integer.
-
-If all that is wanted is a few of the columns, this slight modification below
-will retrieve only the desired data. In general, it is a good idea to retrieve
-only the columns that will actually be used. Requesting all the columns (with
-``*``) requires more reads from disk and slows down the query.
-
-.. code-block:: python
-
- line = "SELECT NumPart, GlobalHaloID FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
- results = mtc.query(line)
-
-``results`` is a list containing a single tuple containing two items, the values for
-``NumPart`` first and ``GlobalHaloID`` second.
-
-There is also a convenience function that will retrieve all the data columns
-for a given halo.
-The input of the function is the ``GlobalHaloID`` for the
-halo of interest, and it returns a dictionary where the keys are the names
-of the data columns, and the values are the entries in the database.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- info = mtc.get_halo_info(1544)
- print info
- {'BulkVelX': -32759799.359999999,
- 'BulkVelY': -28740239.109999999,
- 'BulkVelZ': -20066000.690000001,
- 'CenMassX': 0.23059111360000001,
- 'CenMassY': 0.4061139809,
- 'CenMassZ': 0.80882763749999997,
- 'ChildHaloFrac0': 0.9642857141249418,
- 'ChildHaloFrac1': 0.0,
- 'ChildHaloFrac2': 0.0,
- 'ChildHaloFrac3': 0.0,
- 'ChildHaloFrac4': 0.0,
- 'ChildHaloID0': 1688,
- 'ChildHaloID1': 1712,
- 'ChildHaloID2': 1664,
- 'ChildHaloID3': 1657,
- 'ChildHaloID4': 1634,
- 'GlobalHaloID': 1544,
- 'HaloMass': 20934692770000.0,
- 'MaxRad': 0.01531299899,
- 'NumPart': 196,
- 'SnapCurrentTimeIdentifier': 1275946788,
- 'SnapHaloID': 56,
- 'SnapZ': 0.024169713061444002}
-
-If data from more than one halo is desired, more than one item will be returned.
-This query will find the largest halo from each of the snapshots.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0;"
- results = mtc.query(line)
-
-``results`` is a list of multiple two-tuples.
-Note that SQLite doesn't return the values in any
-particular order. If order is unimportant, it saves time. But if order is
-important, you can modify the query to sort the results by redshift.
-
-.. code-block:: python
-
- line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0 ORDER BY SnapZ DESC;"
-
-Now ``results`` will be ordered by time, first to last, for each two-tuple
-in the list.
-
-The function ``get_halo_parents()`` will return all the halos that are
-identified as parents of the specified halo.
-Due to the way that the halo tree is constructed, it will also return parent
-halos that have zero mass contribution to the specified halo.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- parents = mtc.get_halo_parents(1688)
- print parents
- [[1544, 0.9642857141249418],
- [1613, 0.0],
- [1614, 0.0],
- [1489, 0.0],
- [1512, 0.0],
- [1519, 0.0],
- [1609, 0.0]]
-
-The last example shows the kernel of the most important operation for a
-merger tree: recursion back in time to find progenitors for a halo. Using a
-query similar to ones above, the ``GlobalHaloID`` is found for the halo of
-interest at some late point in time (z=0, typically). Using that value (given
-the random-ish value of 1234567),
-the halos that came before can be identified very easily:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- lineage = {}
- # Recursive function on parent halos.
- def findParent(haloID, lineage):
- line = "SELECT GlobalHaloID from Halos where ChildHaloID0=%d;" % haloID
- results = mtc.query(line)
- if results == []:
- return lineage
- # A one-tuple inside a list.
- parentID = results[0][0]
- lineage[parentID] = haloID
- # Now we recurse back in time.
- lineage = findParent(parentID, lineage)
-
- # Stores the parent->child relationships.
- lineage = {}
- # Call the function once with the late halo.
- lineage = findParent(1234567, lineage)
-
-Contained within the dict ``lineage`` is the primary lineage for the final
-chosen halo. Storing the family tree in this way may not be the best choice,
-but this makes it clear how easy it is to build up the history of a halo
-over time.
-
-Merger Tree Convenience Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Below are some examples of the convenience functions available.
-
-**get_GlobalHaloID(SnapHaloID, z)**. Returns the GlobalHaloID for the
-given halo.::
-
-
- Parameters
- ---------
- SnapHaloID : Integer
- The index label for the halo of interest, equivalent to
- the first column of the halo finder text output file.
- z : Float
- The redshift for the halo of interest. The value returned will be
- for the halo with SnapHaloID equal to ID (above) with redshift
- closest to this value.
-
- Examples
- --------
- >>> this_halo = mtc.get_GlobalHaloID(0, 0.)
-
-**get_halo_parents(GlobalHaloID)**. Returns a list of the parent halos to the
-given halo, along with the contribution fractions from parent to child.
-This function returns a list of lists, where each entry in the top list
-is [GlobalHaloID, ChildHaloFrac] of the parent halo in relationship
-to the given child halo.::
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The GlobalHaloID of the halo of interest.
-
- Examples
- --------
- >>> parents = mtc.get_halo_parents(1688)
- >>> print parents
- [[1544, 0.9642857141249418],
- [1613, 0.0],
- [1614, 0.0],
- [1489, 0.0],
- [1512, 0.0],
- [1519, 0.0],
- [1609, 0.0]]
-
-**get_direct_parent(GlobalHaloID)**. Returns the GlobalHaloID of the direct
-parent of the given halo.
-This is accomplished by identifying the most massive parent halo
-that contributes at least 50% of its mass to the given halo.::
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The GlobalHaloID of the halo of interest.
-
- Examples
- --------
- >>> parent = mtc.get_direct_parent(1688)
- >>> print parent
- 1544
-
-**get_halo_info(GlobalHaloID)**. Returns all available information for
-the given GlobalHaloID in the form of a dict.::
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The unique index for the halo of interest.
-
- Examples
- --------
- >>> info = mtc.get_halo_info(1544)
- >>> print info
- {'BulkVelX': -32759799.359999999,
- 'BulkVelY': -28740239.109999999,
- 'BulkVelZ': -20066000.690000001,
- 'CenMassX': 0.23059111360000001,
- 'CenMassY': 0.4061139809,
- 'CenMassZ': 0.80882763749999997,
- 'ChildHaloFrac0': 0.9642857141249418,
- 'ChildHaloFrac1': 0.0,
- 'ChildHaloFrac2': 0.0,
- 'ChildHaloFrac3': 0.0,
- 'ChildHaloFrac4': 0.0,
- 'ChildHaloID0': 1688,
- 'ChildHaloID1': 1712,
- 'ChildHaloID2': 1664,
- 'ChildHaloID3': 1657,
- 'ChildHaloID4': 1634,
- 'GlobalHaloID': 1544,
- 'HaloMass': 20934692770000.0,
- 'MaxRad': 0.01531299899,
- 'NumPart': 196,
- 'SnapCurrentTimeIdentifier': 1275946788,
- 'SnapHaloID': 56,
- 'SnapZ': 0.024169713061444002}
-
-
-Merger Tree Output
-------------------
-
-There are two included methods for outputting the contents of a Merger Tree
-database: Graphviz and plain-text columnar format.
-
-Graphviz Output
-~~~~~~~~~~~~~~~
-
-The `Graphviz <http://graphviz.org/>`_ output function can write the merger
-tree to a text file, which can then be parsed by the GraphViz executable
-``dot`` into an image, or an image can be created directly.
-The GraphViz engine used to parse the
-output is the ``dot`` engine, which produces hierarchical diagrams where
-directionality (such as left to right or top to bottom)
-indicates some meaningful property.
-In the case of the merger tree, top to bottom indicates the progress of
-time.
-Graphviz can output the visualization into a wide range of image and vector
-formats suitable for any application.
-
-Below is a simple example of the Graphviz/dot visualization.
-Each box contains the mass of the halo (in Msun), and the center of mass
-for the halo in simulation units.
-For each snapshot, the box for the largest halo is colored red.
-The numbers next to the link arrows gives the percentage of the parent
-halo's mass that goes to the child.
-On each row, the un-linked black boxes
-contain the redshift for that snapshot.
-
-.. image:: _images/merger_tree_ex.png
- :width: 400
- :height: 438
-
-To output the merger tree for a set of halos, the chosen halos need to be
-identified. There are two choices, either the ``GlobalHaloID`` or
-the ``SnapHaloID`` along with the ``SnapCurrentTimeIdentifier`` value
-for the chosen halo(s) may be used.
-Two bits of information need to be used if ``GlobalHaloID`` is not specified
-because ``SnapHaloID`` is not an unique identifier in the database.
-The reason why ``SnapCurrentTimeIdentifier`` is used rather than ``SnapZ`` has
-to do with the float valuation of the redshift column and the way SQL queries
-work.
-If ``SnapZ`` were used, the precise float value of the desired redshift would
-have to be used, rather than the simpler-to-get-correct integer value of
-``SnapCurrentTimeIdentifier``.
-
-Luckily it isn't as hard as it sounds to get the ``GlobalHaloID`` for the
-desired halo(s).
-By using the ``MergerTreeConnect`` class, it is simple to pick out halos
-before creating the Graphviz output.
-Below, the ``GlobalHaloID`` for the most massive halo in the last (z~0, typically)
-snapshot is found:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
- results = mtc.query(line)
- print results
-
-Because of the way the database is created, from early times to late, the most
-massive halo at z~0 will have the largest ``GlobalHaloID`` for all halos with
-``SnapHaloID``=0. ``results`` will contain a one-tuple in a list of the
-desired ``GlobalHaloID``.
-
-Alternatively, one of the convenience functions can be used which may be easier:
-
-.. code-block:: python
-
- from yt.mods import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- thisHalo = mtc.get_GlobalHaloID(0, 0.0)
-
-``thisHalo`` will be an integer giving the GlobalHaloID for the most massive
-halo (ID=0) at z=0.0.
-
-To output the merger tree for the five largest halos in the last snapshot,
-it may be simplest to find the ``SnapCurrentTimeIdentifier`` for that
-snapshot.
-This can either be done by referencing the dataset itself by hand
-(look for ``CurrentTimeIdentifier`` in the Enzo restart file), or by querying
-the database.
-Here is how to query the database for the right information:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
- results = mtc.query(line)
-
- line = "SELECT SnapCurrentTimeIdentifier FROM Halos WHERE GlobalHaloID=%d;" % results[0][0]
- results = mtc.query(line)
- print results
-
-``results`` contains a one-tuple in a list of the desired
-``SnapCurrentTimeIdentifier``.
-Supposing that the desired ``SnapCurrentTimeIdentifier`` is 72084721, outputting
-merger trees is now simple:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeDotOutput(halos=[0,1,2,3,4], database='halos.db',
- dotfile='MergerTree.gv', current_time=72084721)
-
-This will output the file ``MergerTree.gv`` which can be parsed by Graphviz.
-To output to an image format, name the file appropriately (``MergerTree.png``).
-A list of available GraphViz image formats can be found by invoking
-(from the command line) ``dot -v``.
-
-If the ``GlobalHaloID`` values are known for all of the desired halos,
-``current_time`` should not be specified, as below:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
- dotfile='MergerTree.gv', link_min=0.7)
-
-The ``link_min`` parameter above limits the tree to following links between
-parent and child halos for which at least 70% of the parent halo's mass goes
-to the child. The default is 0.2.
-
-In this slightly modified example below, if ``dot`` is installed in the
-``PATH``, an image file will be created without an intermediate text file:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
- dotfile='MergerTree.png', link_min=0.7)
-
-
-Plain-Text Output
-~~~~~~~~~~~~~~~~~
-
-This is how to output the entire contents of the database to a text file:
-
-.. code-block:: python
-
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeTextOutput(database='halos.db', outfile='MergerTreeDB.txt')
-
-Putting it All Together
------------------------
-
-Here is an example of how to create a merger tree for the most massive halo
-in the final snapshot from start to finish, and output the Graphviz
-visualization as a PDF file.
-This will work in serial and in parallel.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
- from yt.analysis_modules.halo_finding.api import *
-
- # Pick our snapshots to use.
- files = []
- start = 100
- finish = 116
- for i in range(start, finish + 1):
- files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
- my_database = '/path/to/database/halos.db'
-
- # Build the tree.
- MergerTree(restart_files=files, database=my_database)
-
- # Get the GlobalHaloID for the halo.
- mtc = MergerTreeConnect(database=my_database)
- my_halo = mtc.get_GlobalHaloID(0, 0.0)
-
- # Output the tree as a PDF file.
- MergerTreeDotOutput(halos=my_halo, database=my_database, link_min=0.5,
- dotfile='MergerTree.pdf')
-
-
-
https://bitbucket.org/yt_analysis/yt/commits/d0a81e92ad2b/
Changeset: d0a81e92ad2b
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-24 01:44:26
Summary: Removing merger tree from api docs.
Affected #: 1 file
diff -r 950c0a524631f152732f8df6bda4dbe1fa572e98 -r d0a81e92ad2ba5c85c7bafa367314b9c92578a5a doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -439,13 +439,6 @@
You can use Halo catalogs generated externally as well:
-.. autosummary::
- :toctree: generated/
-
- ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.HaloCatalog
- ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.EnzoFOFMergerTree
- ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.plot_halo_evolution
-
Two Point Functions
^^^^^^^^^^^^^^^^^^^
https://bitbucket.org/yt_analysis/yt/commits/aa26e8fb20d5/
Changeset: aa26e8fb20d5
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-24 01:52:59
Summary: Removing old halo interfaces from API docs.
Affected #: 1 file
diff -r d0a81e92ad2ba5c85c7bafa367314b9c92578a5a -r aa26e8fb20d5c8ed1aee8d9e43feca6845a9ebbb doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -408,35 +408,6 @@
~yt.analysis_modules.halo_finding.halo_objects.parallelHF
~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
-You can also operate on the Halo and HAloList objects themselves:
-
-.. autosummary::
- :toctree: generated/
-
- ~yt.analysis_modules.halo_finding.halo_objects.Halo
- ~yt.analysis_modules.halo_finding.halo_objects.HaloList
- ~yt.analysis_modules.halo_finding.halo_objects.HOPHalo
- ~yt.analysis_modules.halo_finding.halo_objects.RockstarHalo
- ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHalo
- ~yt.analysis_modules.halo_finding.halo_objects.FOFHalo
- ~yt.analysis_modules.halo_finding.halo_objects.LoadedHalo
- ~yt.analysis_modules.halo_finding.halo_objects.TextHalo
- ~yt.analysis_modules.halo_finding.halo_objects.RockstarHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.HOPHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.FOFHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.LoadedHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.TextHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHaloList
-
-There are also functions for loading halos from disk:
-
-.. autosummary::
- :toctree: generated/
-
- ~yt.analysis_modules.halo_finding.halo_objects.LoadHaloes
- ~yt.analysis_modules.halo_finding.halo_objects.LoadTextHaloes
- ~yt.analysis_modules.halo_finding.halo_objects.LoadRockstarHalos
-
You can use Halo catalogs generated externally as well:
Two Point Functions
https://bitbucket.org/yt_analysis/yt/commits/3be418bfa459/
Changeset: 3be418bfa459
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-24 02:00:27
Summary: Removing one more line.
Affected #: 1 file
diff -r aa26e8fb20d5c8ed1aee8d9e43feca6845a9ebbb -r 3be418bfa459d9ded32d6ec5d3ade560c062db5c doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -408,8 +408,6 @@
~yt.analysis_modules.halo_finding.halo_objects.parallelHF
~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
-You can use Halo catalogs generated externally as well:
-
Two Point Functions
^^^^^^^^^^^^^^^^^^^
https://bitbucket.org/yt_analysis/yt/commits/0a9b5f49e715/
Changeset: 0a9b5f49e715
Branch: yt-3.0
User: MatthewTurk
Date: 2014-07-24 13:50:20
Summary: Merged in brittonsmith/yt/yt-3.0 (pull request #1066)
Removing most of the merger tree machinery.
Affected #: 8 files
diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/analyzing/analysis_modules/halo_analysis.rst
--- a/doc/source/analyzing/analysis_modules/halo_analysis.rst
+++ b/doc/source/analyzing/analysis_modules/halo_analysis.rst
@@ -11,4 +11,5 @@
halo_transition
halo_finding
halo_mass_function
+ halo_merger_tree
halo_analysis_example
diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/analyzing/analysis_modules/halo_merger_tree.rst
--- /dev/null
+++ b/doc/source/analyzing/analysis_modules/halo_merger_tree.rst
@@ -0,0 +1,6 @@
+.. _merger_tree:
+
+Halo Merger Tree
+================
+
+.. note:: As of :code:`yt-3.0`, the halo merger tree functionality has been removed to be replaced by machinery that works with the ``HaloCatalog`` object. In the mean time, this functionality can still be found in :code:`yt-2.x`.
diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/analyzing/analysis_modules/merger_tree.rst
--- a/doc/source/analyzing/analysis_modules/merger_tree.rst
+++ /dev/null
@@ -1,767 +0,0 @@
-.. _merger_tree:
-
-Halo Merger Tree
-================
-
-.. note:: At the moment the merger tree is not yet implemented using new
- halo catalog functionality.
-
-The Halo Merger Tree extension is capable of building a database of halo mergers
-over a set of time-ordered Enzo datasets. The fractional contribution of older
-'parent' halos to younger 'child' halos is calculated by comparing the unique
-index labels of their constituent particles. The data is stored in a
-`SQLite <http://sqlite.org/>`_ database which enables the use of powerful
-and fast SQL queries over all the halos.
-
-General Overview
-----------------
-
-The first requirement is a set of sequential datasets.
-The detail of the merger tree is increased as the difference in
-time between snapshots is reduced, at the cost of higher computational effort
-for the tree itself and and disk usage for the snapshots.
-The merger tree relies on the output of one of the Halo Finders in yt, and the
-user can choose which one to use.
-The merger tree is capable of running the halo finder if it hasn't already
-been done.
-Once halo finding is accomplished for all the data snapshots, the halo
-lineage is calculated by comparing the particle membership of halos between
-pairs of time steps.
-The halo data and tree data is stored in the SQLite database.
-
-Clearly, another requirement is that Python has the
-`sqlite3 library <http://docs.python.org/library/sqlite3.html>`_
-installed.
-This should be built along with everything else yt needs
-if the ``install_script.sh`` was used.
-
-The merger tree can be calculated in parallel, and if necessary, it will run
-the halo finding in parallel as well. Please see the note below about the
-special considerations needed for Network File Systems.
-
-There is a convenience-wrapper for querying the database, called
-``MergerTreeConnect``.
-It simplifies accessing data in the database.
-
-There are two output classes for the merger tree. The ``MergerTreeDotOutput`` class
-outputs the tree for a user-specified subset of halos to a
-`Graphviz format <http://graphviz.org/>`_ file.
-Graphviz is an open-source package for visualizing connected objects in a
-graphical way.
-There are binary distributions for all major operating systems.
-It is also possible to dump the contents of the SQLite database to a simple text file
-with the ``MergerTreeTextOutput`` class.
-The data is saved in columnar format.
-
-Conceptual Primer
-~~~~~~~~~~~~~~~~~
-
-The best way to view the merger tree extension is as a two-part process.
-First, the merger tree is built and stored in the database.
-This process can be quite time consuming, depending on the size of the simulation,
-and the number and size of halos found in the snapshots.
-This is not a process one wants to do very often, and why it is separate
-from the analysis parts.
-
-The second part is actually a many-part process, which is the analysis of the
-merger tree itself.
-The first step is computationally intensive, but the analysis step
-is user-intensive.
-The user needs to decide what to pull out of the merger tree
-and figure out how to extract the needed data with SQL statements.
-Once an analysis pipeline is written, it should run very fast for even
-very large databases.
-
-A Note About Network File Systems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Accessing a SQLite database stored on a Network (or Distributed) File System (NFS)
-is a risky thing to do, particularly if more than one task wants to write
-at the same time (`see more here <http://www.sqlite.org/lockingv3.html#how_to_corrupt>`_).
-NFS disks can store files on multiple physical hard drives, and it can take time
-for changes made by one task to appear to all the parallel tasks.
-Only one task of the merger tree ever interacts with the database,
-so these dangers are minimal,
-but in general it's a good idea to know something about the disk used to
-store the database.
-
-In general, it is recommended to keep the database on a 'real disk'
-(/tmp for example, if all the tasks are on the same SMP node,
-or RAM disk for extra speed) if possible,
-but it should work on a NFS disk as well.
-If a temporary disk is used to store the database while it's being built,
-remember to copy the file to a permanent disk after the merger tree script
-is finished.
-
-
-Running and Using the Halo Merger Tree
---------------------------------------
-
-It is very simple to create a merger tree database for a series of snapshots.
-The most difficult part is creating an ordered list of Enzo restart files.
-There are two ways to do it, by hand or with the EnzoSimulation extension.
-
-By Hand
-~~~~~~~
-
-Here is an example of how to build the list and build the database by hand.
-Here, the snapshots are stored in directories named DD????, and the enzo
-restart file named data????, where ???? is a four digit zero-padded integer.
-The final snapshot considered (most progressed in time) is DD0116,
-and the earliest that will be examined is DD0100.
-The database will be saved to ``/path/to/database/halos.db``.
-This example below works identically in serial or in parallel.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
- from yt.analysis_modules.halo_finding.api import *
-
- files = []
- start = 100
- finish = 116
- for i in range(start, finish + 1):
- files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
- MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-If the halos have not been found previously for the snapshots, the halo finder
-will be run automatically. See the note about this below.
-
-Using EnzoSimulation
-~~~~~~~~~~~~~~~~~~~~
-
-Here is how to build the input list of restart files using the EnzoSimulation
-extension.
-It is possible to set range and interval between snapshots.
-Please see the EnzoSimulation
-documentation (:ref:`analyzing-an-entire-simulation`) for details.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
- from yt.analysis_modules.halo_finding.api import *
- import yt.analysis_modules.simulation_handler.api as ES
-
- es = ES.EnzoSimulation('/path/to/snapshots/simulation.par')
-
- files = []
- for output in es.allOutputs:
- files.append(output['filename'])
-
- MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-Merger Tree Parallelism
------------------------
-
-If the halos are to be found during the course of building the merger tree,
-run with an appropriate number of tasks to the size of the dataset and the
-halo finder used.
-The speed of the merger tree itself,
-which compares halo membership in parallel very effectively,
-is almost completely constrained by the read/write times of the SQLite file.
-In tests with the halos pre-located, there is not much speedup beyond two MPI tasks.
-There is no negative effect with running the merger tree with more tasks (which is
-why if halos are to be found by the merger tree, the merger tree should be
-run with as many tasks as that step requires), and indeed if the simulation
-is a large one, running in parallel does provide memory parallelism,
-which is important.
-
-How The Database Is Handled In Analysis Restarts
-------------------------------------------------
-
-The Merger Tree is designed to allow the merger tree database to be built
-incrementally.
-For example, if a simulation is currently being run, the merger
-tree database can be built for the available datasets, and when new ones are
-created, the database extended to include them.
-So if there are going to be
-60 data snapshots total (indexed (0, 1, 2, ..., 59)), and only 50 are saved when the
-tree is first built, the analysis should be done on datasets [0, 49].
-If the last ten become available, re-run the merger tree on datasets [49, 59]
-referencing the same database as before.
-By referencing the same database as before, work does not need to be repeated.
-
-If the merger tree process is interrupted before completion (say, if the
-jobs walltime is exceeded and the scheduler kills it), just run the exact
-same job again.
-The merger tree will check to see what work has already been completed, and
-resume where it left off.
-
-Additional Parameters
-~~~~~~~~~~~~~~~~~~~~~
-
-When calling ``MergerTree``, there are three parameters that control how the
-halo finder is run, if it needs to be run.
-
- * ``halo_finder_function`` (name) - Which of the halo finders (:ref:`halo_finding`)
- to use. Default: ``HaloFinder`` (HOP).
- * ``halo_finder_threshold`` (float) - When using HOP or Parallel HOP, this sets the
- threshold used. Default: 80.0.
- * ``FOF_link_length`` (float) - When using Friends of Friends (FOFHaloFinder), this sets
- the inter-particle link length used. Default: 0.2.
- * ``dm_only`` (bool) - Whether to include stars (False), or only the dark
- matter particles when building halos (True).
- Default: False.
- * ``refresh`` (bool) - If set to True, this will run the halo finder and
- rebuild the database regardless of whether or not the halo files or
- database exist on disk already.
- Default: False.
- * ``index`` (bool) - Whether to add an index to the SQLite file. True makes
- SQL searches faster at the cost of additional disk space. Default=True.
-
-Example using Parallel HOP:
-
-.. code-block:: python
-
- MergerTree(restart_files=files, database='/path/to/database/halos.db',
- halo_finder_function=parallelHF, halo_finder_threshold=100.)
-
-Pre-Computing Halos
-~~~~~~~~~~~~~~~~~~~
-
-If halo finding is to happen before the merger tree is calculated, and the
-work is not to be wasted, special care
-should be taken to ensure that all the data required for the merger tree is
-saved.
-By default, the merger tree looks for files that begin with the name ``MergerHalos``
-in the same directory as each Enzo restart file,
-and if those files are missing or renamed, halo finding will be performed again.
-If ``halos`` is the list of halos returned by the halo finder, these three
-commands should be called to save the needed data:
-
-.. code-block:: python
-
- halos.write_out('MergerHalos.out')
- halos.write_particle_lists('MergerHalos')
- halos.write_particle_lists_txt('MergerHalos')
-
-There is a convenience function that will call the three functions above
-at one time:
-
-.. code-block:: python
-
- halos.dump('MergerHalos')
-
-Please see the documents on halo finding for more information on what these
-commands do (:ref:`halo_finding`).
-
-Accessing Data in the Database
-------------------------------
-
-SQLite databases support nearly all of the standard SQL queries.
-It is possible to write very complicated and powerful SQL queries, but below
-only simple examples will are shown. Please see other resources (WWW, books) for
-more on how to write SQL queries.
-
-It is possible to read and modify a SQLite database from the command line using
-the ``sqlite3`` command (e.g. ``sqlite3 database.db``). It can be very convenient
-to use this to quickly inspect a database, but is not suitable for extracting or inserting
-large amounts of data. There are many examples (again, see the WWW or books)
-available on how to use the command line ``sqlite3`` command.
-
-The table containing halo data in the database is named 'Halos'.
-All queries for halo data will come from this table.
-The table has these columns:
-
- #. ``GlobalHaloID`` (int) - A fully-unique identifier for the halo.
- #. ``SnapCurrentTimeIdentifier`` (int) - An unique time identifier for the snapshot
- the halo comes from. Equivalent to 'CurrentTimeIdentifier' from the Enzo
- restart file.
- #. ``SnapZ`` (float) - The redshift for the halo.
- #. ``SnapHaloID`` (int) - The halo ID for the halo taken from the output of the
- halo finder (i.e. 'halos.write_out("HopAnalysis.out")'). It is unique for halos
- in the same snapshot, but not unique across the full database.
- #. ``HaloMass`` (float) - The total mass of dark matter in the halo as
- identified by the halo finder.
- #. ``NumPart`` (int) - Number of dark matter particles in the halo as identified
- by the halo finder.
- #. ``CenMassX``,
- #. ``CenMassY``,
- #. ``CenMassZ`` (float) - The location of the center of mass of the halo in code units.
- #. ``BulkVelX``,
- #. ``BulkVelY``,
- #. ``BulkVelZ`` (float) - The velocity of the center of mass of the halo in
- cgs units.
- #. ``MaxRad`` (float) - The distance from the center of mass to the most
- remote particle in the halo in code units.
- #. ``ChildHaloID0`` (int) - The GlobalHaloID of the child halo which receives
- the greatest proportion of particles from this halo.
- #. ``ChildHaloFrac0`` (float) - The fraction by mass of particles from this
- (parent) halo that goes to the child halo recorded in ChildHaloID0. If
- all the particles from this parent halo goes to ChildHaloID0, this number will
- be 1.0, regardless of the mass of the child halo.
- #. ``ChildHaloID[1-4]``, ``ChildHaloFrac[1-4]`` (int, float) - Similar to the
- columns above, these store the second through fifth greatest recipients of
- particle mass from this parent halo.
-
-.. warning::
-
- A value of -1 in any of the ``ChildHaloID`` columns corresponds to
- a fake (placeholder) child halo entry. There is no halo with an ID equal to -1.
- This is used during the merger tree construction,
- and must be accounted for when constructing SQL queries of the database.
-
-To get the data for the most massive halo at the end of the simulation,
-there is a convenience class that simplifies database access. Using it, a query
-might look like this:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- line = "SELECT * FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
- results = mtc.query(line)
-
-``results`` is a list containing a singular tuple containing the values for that halo in
-the same order as
-given above for the columns.
-
-Another way to get the same information is to use one of the convenience functions.
-The following example shows how to do this:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- this_halo = mtc.get_GlobalHaloID(0, 0.0)
-
-The first term of ``get_GlobalHaloID`` is the ``SnapHaloID`` for the halo of
-interest, and the second is the redshift of interest.
-The results are stored in ``this_halo`` as an integer.
-
-If all that is wanted is a few of the columns, this slight modification below
-will retrieve only the desired data. In general, it is a good idea to retrieve
-only the columns that will actually be used. Requesting all the columns (with
-``*``) requires more reads from disk and slows down the query.
-
-.. code-block:: python
-
- line = "SELECT NumPart, GlobalHaloID FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
- results = mtc.query(line)
-
-``results`` is a list containing a single tuple containing two items, the values for
-``NumPart`` first and ``GlobalHaloID`` second.
-
-There is also a convenience function that will retrieve all the data columns
-for a given halo.
-The input of the function is the ``GlobalHaloID`` for the
-halo of interest, and it returns a dictionary where the keys are the names
-of the data columns, and the values are the entries in the database.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- info = mtc.get_halo_info(1544)
- print info
- {'BulkVelX': -32759799.359999999,
- 'BulkVelY': -28740239.109999999,
- 'BulkVelZ': -20066000.690000001,
- 'CenMassX': 0.23059111360000001,
- 'CenMassY': 0.4061139809,
- 'CenMassZ': 0.80882763749999997,
- 'ChildHaloFrac0': 0.9642857141249418,
- 'ChildHaloFrac1': 0.0,
- 'ChildHaloFrac2': 0.0,
- 'ChildHaloFrac3': 0.0,
- 'ChildHaloFrac4': 0.0,
- 'ChildHaloID0': 1688,
- 'ChildHaloID1': 1712,
- 'ChildHaloID2': 1664,
- 'ChildHaloID3': 1657,
- 'ChildHaloID4': 1634,
- 'GlobalHaloID': 1544,
- 'HaloMass': 20934692770000.0,
- 'MaxRad': 0.01531299899,
- 'NumPart': 196,
- 'SnapCurrentTimeIdentifier': 1275946788,
- 'SnapHaloID': 56,
- 'SnapZ': 0.024169713061444002}
-
-If data from more than one halo is desired, more than one item will be returned.
-This query will find the largest halo from each of the snapshots.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0;"
- results = mtc.query(line)
-
-``results`` is a list of multiple two-tuples.
-Note that SQLite doesn't return the values in any
-particular order. If order is unimportant, it saves time. But if order is
-important, you can modify the query to sort the results by redshift.
-
-.. code-block:: python
-
- line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0 ORDER BY SnapZ DESC;"
-
-Now ``results`` will be ordered by time, first to last, for each two-tuple
-in the list.
-
-The function ``get_halo_parents()`` will return all the halos that are
-identified as parents of the specified halo.
-Due to the way that the halo tree is constructed, it will also return parent
-halos that have zero mass contribution to the specified halo.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
- parents = mtc.get_halo_parents(1688)
- print parents
- [[1544, 0.9642857141249418],
- [1613, 0.0],
- [1614, 0.0],
- [1489, 0.0],
- [1512, 0.0],
- [1519, 0.0],
- [1609, 0.0]]
-
-The last example shows the kernel of the most important operation for a
-merger tree: recursion back in time to find progenitors for a halo. Using a
-query similar to ones above, the ``GlobalHaloID`` is found for the halo of
-interest at some late point in time (z=0, typically). Using that value (given
-the random-ish value of 1234567),
-the halos that came before can be identified very easily:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- lineage = {}
- # Recursive function on parent halos.
- def findParent(haloID, lineage):
- line = "SELECT GlobalHaloID from Halos where ChildHaloID0=%d;" % haloID
- results = mtc.query(line)
- if results == []:
- return lineage
- # A one-tuple inside a list.
- parentID = results[0][0]
- lineage[parentID] = haloID
- # Now we recurse back in time.
- lineage = findParent(parentID, lineage)
-
- # Stores the parent->child relationships.
- lineage = {}
- # Call the function once with the late halo.
- lineage = findParent(1234567, lineage)
-
-Contained within the dict ``lineage`` is the primary lineage for the final
-chosen halo. Storing the family tree in this way may not be the best choice,
-but this makes it clear how easy it is to build up the history of a halo
-over time.
-
-Merger Tree Convenience Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Below are some examples of the convenience functions available.
-
-**get_GlobalHaloID(SnapHaloID, z)**. Returns the GlobalHaloID for the
-given halo.::
-
-
- Parameters
- ---------
- SnapHaloID : Integer
- The index label for the halo of interest, equivalent to
- the first column of the halo finder text output file.
- z : Float
- The redshift for the halo of interest. The value returned will be
- for the halo with SnapHaloID equal to ID (above) with redshift
- closest to this value.
-
- Examples
- --------
- >>> this_halo = mtc.get_GlobalHaloID(0, 0.)
-
-**get_halo_parents(GlobalHaloID)**. Returns a list of the parent halos to the
-given halo, along with the contribution fractions from parent to child.
-This function returns a list of lists, where each entry in the top list
-is [GlobalHaloID, ChildHaloFrac] of the parent halo in relationship
-to the given child halo.::
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The GlobalHaloID of the halo of interest.
-
- Examples
- --------
- >>> parents = mtc.get_halo_parents(1688)
- >>> print parents
- [[1544, 0.9642857141249418],
- [1613, 0.0],
- [1614, 0.0],
- [1489, 0.0],
- [1512, 0.0],
- [1519, 0.0],
- [1609, 0.0]]
-
-**get_direct_parent(GlobalHaloID)**. Returns the GlobalHaloID of the direct
-parent of the given halo.
-This is accomplished by identifying the most massive parent halo
-that contributes at least 50% of its mass to the given halo.::
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The GlobalHaloID of the halo of interest.
-
- Examples
- --------
- >>> parent = mtc.get_direct_parent(1688)
- >>> print parent
- 1544
-
-**get_halo_info(GlobalHaloID)**. Returns all available information for
-the given GlobalHaloID in the form of a dict.::
-
- Parameters
- ----------
- GlobalHaloID : Integer
- The unique index for the halo of interest.
-
- Examples
- --------
- >>> info = mtc.get_halo_info(1544)
- >>> print info
- {'BulkVelX': -32759799.359999999,
- 'BulkVelY': -28740239.109999999,
- 'BulkVelZ': -20066000.690000001,
- 'CenMassX': 0.23059111360000001,
- 'CenMassY': 0.4061139809,
- 'CenMassZ': 0.80882763749999997,
- 'ChildHaloFrac0': 0.9642857141249418,
- 'ChildHaloFrac1': 0.0,
- 'ChildHaloFrac2': 0.0,
- 'ChildHaloFrac3': 0.0,
- 'ChildHaloFrac4': 0.0,
- 'ChildHaloID0': 1688,
- 'ChildHaloID1': 1712,
- 'ChildHaloID2': 1664,
- 'ChildHaloID3': 1657,
- 'ChildHaloID4': 1634,
- 'GlobalHaloID': 1544,
- 'HaloMass': 20934692770000.0,
- 'MaxRad': 0.01531299899,
- 'NumPart': 196,
- 'SnapCurrentTimeIdentifier': 1275946788,
- 'SnapHaloID': 56,
- 'SnapZ': 0.024169713061444002}
-
-
-Merger Tree Output
-------------------
-
-There are two included methods for outputting the contents of a Merger Tree
-database: Graphviz and plain-text columnar format.
-
-Graphviz Output
-~~~~~~~~~~~~~~~
-
-The `Graphviz <http://graphviz.org/>`_ output function can write the merger
-tree to a text file, which can then be parsed by the GraphViz executable
-``dot`` into an image, or an image can be created directly.
-The GraphViz engine used to parse the
-output is the ``dot`` engine, which produces hierarchical diagrams where
-directionality (such as left to right or top to bottom)
-indicates some meaningful property.
-In the case of the merger tree, top to bottom indicates the progress of
-time.
-Graphviz can output the visualization into a wide range of image and vector
-formats suitable for any application.
-
-Below is a simple example of the Graphviz/dot visualization.
-Each box contains the mass of the halo (in Msun), and the center of mass
-for the halo in simulation units.
-For each snapshot, the box for the largest halo is colored red.
-The numbers next to the link arrows gives the percentage of the parent
-halo's mass that goes to the child.
-On each row, the un-linked black boxes
-contain the redshift for that snapshot.
-
-.. image:: _images/merger_tree_ex.png
- :width: 400
- :height: 438
-
-To output the merger tree for a set of halos, the chosen halos need to be
-identified. There are two choices, either the ``GlobalHaloID`` or
-the ``SnapHaloID`` along with the ``SnapCurrentTimeIdentifier`` value
-for the chosen halo(s) may be used.
-Two bits of information need to be used if ``GlobalHaloID`` is not specified
-because ``SnapHaloID`` is not an unique identifier in the database.
-The reason why ``SnapCurrentTimeIdentifier`` is used rather than ``SnapZ`` has
-to do with the float valuation of the redshift column and the way SQL queries
-work.
-If ``SnapZ`` were used, the precise float value of the desired redshift would
-have to be used, rather than the simpler-to-get-correct integer value of
-``SnapCurrentTimeIdentifier``.
-
-Luckily it isn't as hard as it sounds to get the ``GlobalHaloID`` for the
-desired halo(s).
-By using the ``MergerTreeConnect`` class, it is simple to pick out halos
-before creating the Graphviz output.
-Below, the ``GlobalHaloID`` for the most massive halo in the last (z~0, typically)
-snapshot is found:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
- results = mtc.query(line)
- print results
-
-Because of the way the database is created, from early times to late, the most
-massive halo at z~0 will have the largest ``GlobalHaloID`` for all halos with
-``SnapHaloID``=0. ``results`` will contain a one-tuple in a list of the
-desired ``GlobalHaloID``.
-
-Alternatively, one of the convenience functions can be used which may be easier:
-
-.. code-block:: python
-
- from yt.mods import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- thisHalo = mtc.get_GlobalHaloID(0, 0.0)
-
-``thisHalo`` will be an integer giving the GlobalHaloID for the most massive
-halo (ID=0) at z=0.0.
-
-To output the merger tree for the five largest halos in the last snapshot,
-it may be simplest to find the ``SnapCurrentTimeIdentifier`` for that
-snapshot.
-This can either be done by referencing the dataset itself by hand
-(look for ``CurrentTimeIdentifier`` in the Enzo restart file), or by querying
-the database.
-Here is how to query the database for the right information:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- mtc = MergerTreeConnect(database='halos.db')
-
- line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
- results = mtc.query(line)
-
- line = "SELECT SnapCurrentTimeIdentifier FROM Halos WHERE GlobalHaloID=%d;" % results[0][0]
- results = mtc.query(line)
- print results
-
-``results`` contains a one-tuple in a list of the desired
-``SnapCurrentTimeIdentifier``.
-Supposing that the desired ``SnapCurrentTimeIdentifier`` is 72084721, outputting
-merger trees is now simple:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeDotOutput(halos=[0,1,2,3,4], database='halos.db',
- dotfile='MergerTree.gv', current_time=72084721)
-
-This will output the file ``MergerTree.gv`` which can be parsed by Graphviz.
-To output to an image format, name the file appropriately (``MergerTree.png``).
-A list of available GraphViz image formats can be found by invoking
-(from the command line) ``dot -v``.
-
-If the ``GlobalHaloID`` values are known for all of the desired halos,
-``current_time`` should not be specified, as below:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
- dotfile='MergerTree.gv', link_min=0.7)
-
-The ``link_min`` parameter above limits the tree to following links between
-parent and child halos for which at least 70% of the parent halo's mass goes
-to the child. The default is 0.2.
-
-In this slightly modified example below, if ``dot`` is installed in the
-``PATH``, an image file will be created without an intermediate text file:
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
- dotfile='MergerTree.png', link_min=0.7)
-
-
-Plain-Text Output
-~~~~~~~~~~~~~~~~~
-
-This is how to output the entire contents of the database to a text file:
-
-.. code-block:: python
-
- from yt.analysis_modules.halo_merger_tree.api import *
-
- MergerTreeTextOutput(database='halos.db', outfile='MergerTreeDB.txt')
-
-Putting it All Together
------------------------
-
-Here is an example of how to create a merger tree for the most massive halo
-in the final snapshot from start to finish, and output the Graphviz
-visualization as a PDF file.
-This will work in serial and in parallel.
-
-.. code-block:: python
-
- from yt.mods import *
- from yt.analysis_modules.halo_merger_tree.api import *
- from yt.analysis_modules.halo_finding.api import *
-
- # Pick our snapshots to use.
- files = []
- start = 100
- finish = 116
- for i in range(start, finish + 1):
- files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
- my_database = '/path/to/database/halos.db'
-
- # Build the tree.
- MergerTree(restart_files=files, database=my_database)
-
- # Get the GlobalHaloID for the halo.
- mtc = MergerTreeConnect(database=my_database)
- my_halo = mtc.get_GlobalHaloID(0, 0.0)
-
- # Output the tree as a PDF file.
- MergerTreeDotOutput(halos=my_halo, database=my_database, link_min=0.5,
- dotfile='MergerTree.pdf')
-
-
-
diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -408,44 +408,6 @@
~yt.analysis_modules.halo_finding.halo_objects.parallelHF
~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
-You can also operate on the Halo and HAloList objects themselves:
-
-.. autosummary::
- :toctree: generated/
-
- ~yt.analysis_modules.halo_finding.halo_objects.Halo
- ~yt.analysis_modules.halo_finding.halo_objects.HaloList
- ~yt.analysis_modules.halo_finding.halo_objects.HOPHalo
- ~yt.analysis_modules.halo_finding.halo_objects.RockstarHalo
- ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHalo
- ~yt.analysis_modules.halo_finding.halo_objects.FOFHalo
- ~yt.analysis_modules.halo_finding.halo_objects.LoadedHalo
- ~yt.analysis_modules.halo_finding.halo_objects.TextHalo
- ~yt.analysis_modules.halo_finding.halo_objects.RockstarHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.HOPHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.FOFHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.LoadedHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.TextHaloList
- ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHaloList
-
-There are also functions for loading halos from disk:
-
-.. autosummary::
- :toctree: generated/
-
- ~yt.analysis_modules.halo_finding.halo_objects.LoadHaloes
- ~yt.analysis_modules.halo_finding.halo_objects.LoadTextHaloes
- ~yt.analysis_modules.halo_finding.halo_objects.LoadRockstarHalos
-
-You can use Halo catalogs generated externally as well:
-
-.. autosummary::
- :toctree: generated/
-
- ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.HaloCatalog
- ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.EnzoFOFMergerTree
- ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.plot_halo_evolution
-
Two Point Functions
^^^^^^^^^^^^^^^^^^^
diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded yt/analysis_modules/halo_merger_tree/api.py
--- a/yt/analysis_modules/halo_merger_tree/api.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-API for halo_merger_tree
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-from .merger_tree import \
- DatabaseFunctions, \
- MergerTree, \
- MergerTreeConnect, \
- Node, \
- Link, \
- MergerTreeDotOutput, \
- MergerTreeTextOutput
-
-from .enzofof_merger_tree import \
- HaloCatalog, \
- find_halo_relationships, \
- EnzoFOFMergerTree, \
- plot_halo_evolution
This diff is so big that we needed to truncate the remainder.
Repository URL: https://bitbucket.org/yt_analysis/yt/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the yt-svn
mailing list