[yt-svn] commit/yt: 6 new changesets

commits-noreply at bitbucket.org commits-noreply at bitbucket.org
Thu Jul 24 04:50:28 PDT 2014


6 new commits in yt:

https://bitbucket.org/yt_analysis/yt/commits/0c505346d531/
Changeset:   0c505346d531
Branch:      yt-3.0
User:        brittonsmith
Date:        2014-07-24 00:25:40
Summary:     Removing merger tree functionality.
Affected #:  4 files

diff -r e97899b530819bec400dbe2127aba867d227dc0b -r 0c505346d53132d4d13791493c3413d4be69afde yt/analysis_modules/halo_merger_tree/api.py
--- a/yt/analysis_modules/halo_merger_tree/api.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-API for halo_merger_tree
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-from .merger_tree import \
-    DatabaseFunctions, \
-    MergerTree, \
-    MergerTreeConnect, \
-    Node, \
-    Link, \
-    MergerTreeDotOutput, \
-    MergerTreeTextOutput
-
-from .enzofof_merger_tree import \
-    HaloCatalog, \
-    find_halo_relationships, \
-    EnzoFOFMergerTree, \
-    plot_halo_evolution

diff -r e97899b530819bec400dbe2127aba867d227dc0b -r 0c505346d53132d4d13791493c3413d4be69afde yt/analysis_modules/halo_merger_tree/merger_tree.py
--- a/yt/analysis_modules/halo_merger_tree/merger_tree.py
+++ /dev/null
@@ -1,1161 +0,0 @@
-"""
-MergerTree class and member functions.
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-import numpy as np
-import os, glob, time, gc, md5, sys
-import h5py
-import types
-
-from yt.funcs import *
-
-from yt.analysis_modules.halo_finding.halo_objects import \
-    FOFHaloFinder, HaloFinder, parallelHF
-from yt.analysis_modules.halo_profiler.multi_halo_profiler import \
-    HaloProfiler
-from yt.convenience import load
-from yt.utilities.logger import ytLogger as mylog
-import yt.extern.pydot as pydot
-from yt.utilities.spatial import cKDTree
-from yt.utilities.parallel_tools.parallel_analysis_interface import \
-    ParallelDummy, \
-    ParallelAnalysisInterface, \
-    parallel_blocking_call
-
-try:
-    import sqlite3 as sql
-except ImportError:
-    mylog.error("sqlite3 not imported!")
-
-column_types = {
-"GlobalHaloID":"INTEGER",
-"SnapCurrentTimeIdentifier":"INTEGER",
-"SnapZ":"FLOAT",
-"SnapHaloID":"INTEGER",
-"HaloMass":"FLOAT",
-"NumPart":"INTEGER",
-"CenMassX":"FLOAT",
-"CenMassY":"FLOAT",
-"CenMassZ":"FLOAT",
-"BulkVelX":"FLOAT",
-"BulkVelY":"FLOAT",
-"BulkVelZ":"FLOAT",
-"MaxRad":"FLOAT",
-"ChildHaloID0":"INTEGER",
-"ChildHaloFrac0":"FLOAT",
-"ChildHaloID1":"INTEGER",
-"ChildHaloFrac1":"FLOAT",
-"ChildHaloID2":"INTEGER",
-"ChildHaloFrac2":"FLOAT",
-"ChildHaloID3":"INTEGER",
-"ChildHaloFrac3":"FLOAT",
-"ChildHaloID4":"INTEGER", 
-"ChildHaloFrac4":"FLOAT"}
-
-# In order.
-columns = ["GlobalHaloID", "SnapCurrentTimeIdentifier", "SnapZ", 
-"SnapHaloID", "HaloMass", "NumPart", "CenMassX", "CenMassY",
-"CenMassZ", "BulkVelX", "BulkVelY", "BulkVelZ", "MaxRad",
-"ChildHaloID0", "ChildHaloFrac0",
-"ChildHaloID1", "ChildHaloFrac1",
-"ChildHaloID2", "ChildHaloFrac2",
-"ChildHaloID3", "ChildHaloFrac3",
-"ChildHaloID4", "ChildHaloFrac4"]
-
-# Below we make the SQL command that creates the table "Halos" in the
-# database. This table is where all the data is stored.
-# Each column of data is named and its datatype is specified.
-# The GlobalHaloID is given the PRIMARY KEY property, which means that
-# the SQLite machinery assigns a consecutive and unique integer value
-# to that field automatically as each new entry is entered (that is,
-# if GlobalHaloID isn't specified already).
-create_db_line = "CREATE TABLE Halos ("
-for i, col in enumerate(columns):
-    if i == 0:
-        create_db_line += "%s %s PRIMARY KEY," % (col, column_types[col])
-    else:
-        create_db_line += " %s %s," % (col, column_types[col])
-# Clean of trailing comma, and closing stuff.
-create_db_line = create_db_line[:-1] + ");"
-
-NumNeighbors = 15
-NumDB = 5
-
-def minus_one():
-    return -1
-
-class DatabaseFunctions(object):
-    # Common database functions so it doesn't have to be repeated.
-    def _open_database(self):
-        # open the database. Check to make sure the database file exists.
-        if not os.path.exists(self.database):
-            mylog.error("The database file %s cannot be found. Exiting." % \
-                self.database)
-            return False
-        self.conn = sql.connect(self.database)
-        self.cursor = self.conn.cursor()
-        return True
-
-    def _close_database(self):
-        # close the database cleanly.
-        self.cursor.close()
-        self.conn.close()
-
-class MergerTree(DatabaseFunctions, ParallelAnalysisInterface):
-    r"""Build a merger tree of halos over a time-ordered set of snapshots.
-    This will run a halo finder to find the halos first if it hasn't already
-    been done. The output is a SQLite database file, which may need to
-    be stored on a different disk than the data snapshots. See the full
-    documentation for details.
-    
-    Parameters
-    ----------
-    
-    restart_files : List of strings
-        A list containing the paths to the forward time-ordered set of
-        data snapshots.
-    database : String
-        Name of SQLite database file. Default = "halos.db".
-    halo_finder_function : HaloFinder name
-        The name of the halo finder to use if halo finding is run by 
-        the merger tree. Options: HaloFinder, FOFHaloFinder, parallelHF.
-        Note that this is not a string, so no quotes. Default = HaloFinder.
-    halo_finder_threshold : Float
-        If using HaloFinder or parallelHF, the value of the density threshold
-        used when halo finding. Default = 160.0.
-    FOF_link_length : Float
-        If using FOFHaloFinder, the linking length between particles.
-        Default = 0.2.
-    dm_only : Boolean
-        When halo finding, whether to restrict to only dark matter particles.
-        Default = False.
-    refresh : Boolean
-        True forces the halo finder to run even if the halo data has been
-        detected on disk. Default = False.
-    index : Boolean
-        SQLite databases can have added to them an index which greatly
-        speeds up future queries of the database,
-        at the cost of doubling the disk space used by the file.
-        Default = True.
-
-    Examples
-    --------
-
-    >>> rf = ['/scratch/user/sim1/DD0000/data0000',
-    ... '/scratch/user/sim1/DD0001/data0001',
-    ... '/scratch/user/sim1/DD0002/data0002']
-    >>> MergerTree(rf, database = '/home/user/sim1-halos.db',
-    ... halo_finder_function=parallelHF)
-    """
-    def __init__(self, restart_files=[], database='halos.db',
-            halo_finder_function=HaloFinder, halo_finder_threshold=160.0,
-            FOF_link_length=0.2, dm_only=False, refresh=False,
-            index=True):
-        ParallelAnalysisInterface.__init__(self)
-        self.restart_files = restart_files # list of enzo restart files
-        self.with_halos = np.ones(len(restart_files), dtype='bool')
-        self.database = database # the sqlite database of haloes.
-        self.halo_finder_function = halo_finder_function # which halo finder to use
-        self.halo_finder_threshold = halo_finder_threshold # overdensity threshold
-        self.FOF_link_length= FOF_link_length # For FOF
-        self.dm_only = dm_only
-        self.refresh = refresh
-        self.index = index
-        self.zs = {}
-        # MPI stuff
-        if self.comm.rank is None:
-            self.comm.rank = 0
-        if self.comm.size is None:
-            self.comm.size = 1
-        # Get to work.
-        if self.refresh and self.comm.rank == 0:
-            try:
-                os.unlink(self.database)
-            except:
-                pass
-        if self.comm.rank == 0:
-            self._open_create_database()
-            self._create_halo_table()
-        self._run_halo_finder_add_to_db()
-        # Find the h5 file names for all the halos.
-        for snap in self.restart_files:
-            self._build_h5_refs(snap)
-        # Find out how much work is already stored in the database.
-        if self.comm.rank == 0:
-            z_progress = self._find_progress()
-        else:
-            z_progress = None
-        z_progress = self.comm.mpi_bcast(z_progress)
-        # Loop over the pairs of snapshots to locate likely neighbors, and
-        # then use those likely neighbors to compute fractional contributions.
-        last = None
-        self.write_values = []
-        self.write_values_dict = defaultdict(dict)
-        for snap, pair in enumerate(zip(self.restart_files[:-1], self.restart_files[1:])):
-            if not self.with_halos[snap] or not self.with_halos[snap+1]:
-                continue
-            if self.zs[pair[0]] > z_progress:
-                continue
-            self._find_likely_children(pair[0], pair[1])
-            # last is the data for the parent dataset, which can be supplied
-            # as the child from the previous round for all but the first loop.
-            last = self._compute_child_fraction(pair[0], pair[1], last)
-            if self.comm.rank == 0:
-                mylog.info("Updating database with parent-child relationships.")
-                self._copy_and_update_db()
-                # This has to happen because we delete the old database above.
-                self._open_create_database()
-        del last
-        if self.comm.rank == 0:
-            if self.index:
-                self._write_index()
-            self._close_database()
-        self.comm.barrier()
-        mylog.info("Done!")
-        
-    def _read_halo_lists(self):
-        self.halo_lists = []
-        for i,file in enumerate(self.halo_files):
-            hp = HaloProfiler(self.restart_files[i], halo_list_file=file)
-            self.halo_lists.append(hp.all_halos)
-
-    def _run_halo_finder_add_to_db(self):
-        for cycle, file in enumerate(self.restart_files):
-            gc.collect()
-            ds = load(file)
-            self.zs[file] = ds.current_redshift
-            self.period = ds.domain_right_edge - ds.domain_left_edge
-            # If the halos are already found, skip this data step, unless
-            # refresh is True.
-            dir = os.path.dirname(file)
-            if os.path.exists(os.path.join(dir, 'MergerHalos.out')) and \
-                    os.path.exists(os.path.join(dir, 'MergerHalos.txt')) and \
-                    glob.glob(os.path.join(dir, 'MergerHalos*h5')) is not [] and \
-                    not self.refresh:
-                pass
-            else:
-                # Run the halo finder.
-                if self.halo_finder_function == FOFHaloFinder:
-                    halos = self.halo_finder_function(ds,
-                        link=self.FOF_link_length, dm_only=self.dm_only)
-                else:
-                    halos = self.halo_finder_function(ds,
-                        threshold=self.halo_finder_threshold, dm_only=self.dm_only)
-                halos.write_out(os.path.join(dir, 'MergerHalos.out'))
-                halos.write_particle_lists(os.path.join(dir, 'MergerHalos'))
-                halos.write_particle_lists_txt(os.path.join(dir, 'MergerHalos'))
-                if len(halos) == 0:
-                    mylog.info("Dataset %s has no halos." % file)
-                    self.with_halos[cycle] = False
-                    continue
-                del halos
-            # Now add halo data to the db if it isn't already there by
-            # checking the first halo.
-            continue_check = False
-            if self.comm.rank == 0:
-                currt = ds.unique_identifier
-                line = "SELECT GlobalHaloID from Halos where SnapHaloID=0\
-                and SnapCurrentTimeIdentifier=%d;" % currt
-                self.cursor.execute(line)
-                result = self.cursor.fetchone()
-                if result != None:
-                    continue_check = True
-            continue_check = self.comm.mpi_bcast(continue_check)
-            if continue_check:
-                continue
-            red = ds.current_redshift
-            # Read the halos off the disk using the Halo Profiler tools.
-            hp = HaloProfiler(file, halo_list_file='MergerHalos.out',
-                              halo_list_format={'id':0, 'mass':1, 'numpart':2, 'center':[7, 8, 9], 'velocity':[10, 11, 12], 'r_max':13})
-            if len(hp.all_halos) == 0:
-                mylog.info("Dataset %s has no halos." % file)
-                self.with_halos[cycle] = False
-                del hp
-                continue
-            mylog.info("Entering halos into database for z=%f" % red)
-            if self.comm.rank == 0:
-                for ID,halo in enumerate(hp.all_halos):
-                    numpart = int(halo['numpart'])
-                    values = (None, currt, red, ID, halo['mass'], numpart,
-                    halo['center'][0], halo['center'][1], halo['center'][2],
-                    halo['velocity'][0], halo['velocity'][1], halo['velocity'][2],
-                    halo['r_max'] / ds['mpc'],
-                    -1,0.,-1,0.,-1,0.,-1,0.,-1,0.)
-                    # 23 question marks for 23 data columns.
-                    line = ''
-                    for i in range(23):
-                        line += '?,'
-                    # Pull off the last comma.
-                    line = 'INSERT into Halos VALUES (' + line[:-1] + ')'
-                    self.cursor.execute(line, values)
-                self.conn.commit()
-            self.comm.barrier()
-            del hp
-    
-    def _open_create_database(self):
-        # open the database. This creates the database file on disk if it
-        # doesn't already exist. Open it on root only.
-        self.conn = sql.connect(self.database)
-        self.cursor = self.conn.cursor()
-
-    def _create_halo_table(self):
-        # Handle the error if the table already exists by doing nothing.
-        try:
-            self.cursor.execute(create_db_line)
-            self.conn.commit()
-        except sql.OperationalError:
-            pass
-    
-    def _find_likely_children(self, parentfile, childfile):
-        # For each halo in the parent list, identify likely children in the 
-        # list of children.
-
-        # First, read in the locations of the child halos.
-        child_ds = load(childfile)
-        child_t = child_ds.unique_identifier
-        if self.comm.rank == 0:
-            line = "SELECT SnapHaloID, CenMassX, CenMassY, CenMassZ FROM \
-            Halos WHERE SnapCurrentTimeIdentifier = %d" % child_t
-            self.cursor.execute(line)
-            
-            mylog.info("Finding likely parents for z=%1.5f child halos." % \
-                child_ds.current_redshift)
-            
-            # Build the kdtree for the children by looping over the fetched rows.
-            # Normalize the points for use only within the kdtree.
-            child_points = []
-            for row in self.cursor:
-                child_points.append([row[1] / self.period[0],
-                row[2] / self.period[1],
-                row[3] / self.period[2]])
-            child_points = np.array(child_points)
-            kdtree = cKDTree(child_points, leafsize = 10)
-    
-        # Find the parent points from the database.
-        parent_ds = load(parentfile)
-        parent_t = parent_ds.unique_identifier
-        if self.comm.rank == 0:
-            line = "SELECT SnapHaloID, CenMassX, CenMassY, CenMassZ FROM \
-            Halos WHERE SnapCurrentTimeIdentifier = %d" % parent_t
-            self.cursor.execute(line)
-    
-            # Loop over the returned rows, and find the likely neighbors for the
-            # parents.
-            candidates = {}
-            for row in self.cursor:
-                # Normalize positions for use within the kdtree.
-                query = np.array([row[1] / self.period[0],
-                row[2] / self.period[1],
-                row[3] / self.period[2]])
-                NNtags = kdtree.query(query, NumNeighbors, period=self.period)[1]
-                nIDs = []
-                for n in NNtags:
-                    if n not in nIDs:
-                        nIDs.append(n)
-                # We need to fill in fake halos if there aren't enough halos,
-                # which can happen at high redshifts.
-                while len(nIDs) < NumNeighbors:
-                    nIDs.append(-1)
-                candidates[row[0]] = nIDs
-            del kdtree
-        else:
-            candidates = None
-
-        # Sync across tasks.
-        candidates = self.comm.mpi_bcast(candidates)
-        self.candidates = candidates
-        
-        # This stores the masses contributed to each child candidate.
-        # The +1 is an extra element in the array that collects garbage
-        # values. This is allowing us to eliminate a try/except later.
-        # This extra array element will be cut off eventually.
-        self.child_mass_arr = np.zeros(len(candidates)*NumNeighbors + 1,
-            dtype='float64')
-        # Records where to put the entries in the above array.
-        self.child_mass_loc = defaultdict(dict)
-        # Fill it out with sub-nested default dicts that point to the
-        # garbage slot, and then fill it will correct values for (possibly)
-        # related parent/child halo pairs.
-        for i,halo in enumerate(sorted(candidates)):
-            self.child_mass_loc[halo] = defaultdict(minus_one)
-            for j, child in enumerate(candidates[halo]):
-                self.child_mass_loc[halo][child] = i*NumNeighbors + j
-
-    def _build_h5_refs(self, filename):
-        # For this snapshot, add lists of file names that contain the
-        # particle info for each halo.
-        if not hasattr(self, 'h5files'):
-            self.h5files = defaultdict(dict)
-        if not hasattr(self, 'names'):
-            self.names = defaultdict(set)
-        file_ds = load(filename)
-        currt = file_ds.unique_identifier
-        dir = os.path.dirname(filename)
-        h5txt = os.path.join(dir, 'MergerHalos.txt')
-        lines = file(h5txt)
-        names = set([])
-        for i,line in enumerate(lines):
-            # Get rid of the carriage returns and turn it into a list.
-            line = line.strip().split()
-            self.h5files[currt][i] = line[1:]
-            names.update(line[1:])
-            self.names[currt].update(line[1:])
-        lines.close()
-
-    def _compute_child_fraction(self, parentfile, childfile, last):
-        # Given a parent and child snapshot, and a list of child candidates,
-        # compute what fraction of the parent halo goes to each of the children.
-        
-        parent_ds = load(parentfile)
-        child_ds = load(childfile)
-        parent_currt = parent_ds.unique_identifier
-        child_currt = child_ds.unique_identifier
-        
-        mylog.info("Computing fractional contribututions of particles to z=%1.5f halos." % \
-            child_ds.current_redshift)
-        
-        if last == None:
-            # First we're going to read in the particles, haloIDs and masses from
-            # the parent dataset.
-            parent_names = list(self.names[parent_currt])
-            parent_names.sort()
-            parent_IDs = []
-            parent_masses = []
-            parent_halos = []
-            for i,pname in enumerate(parent_names):
-                if i>=self.comm.rank and i%self.comm.size==self.comm.rank:
-                    h5fp = h5py.File(pname)
-                    for group in h5fp:
-                        gID = int(group[4:])
-                        thisIDs = h5fp[group]['particle_index'][:]
-                        thisMasses = h5fp[group]['ParticleMassMsun'][:]
-                        parent_IDs.append(thisIDs)
-                        parent_masses.append(thisMasses)
-                        parent_halos.append(np.ones(len(thisIDs),
-                            dtype='int32') * gID)
-                        del thisIDs, thisMasses
-                    h5fp.close()
-            # Sort the arrays by particle index in ascending order.
-            if len(parent_IDs)==0:
-                parent_IDs = np.array([], dtype='int64')
-                parent_masses = np.array([], dtype='float64')
-                parent_halos = np.array([], dtype='int32')
-            else:
-                parent_IDs = np.concatenate(parent_IDs).astype('int64')
-                parent_masses = np.concatenate(parent_masses).astype('float64')
-                parent_halos = np.concatenate(parent_halos).astype('int32')
-                sort = parent_IDs.argsort()
-                parent_IDs = parent_IDs[sort]
-                parent_masses = parent_masses[sort]
-                parent_halos = parent_halos[sort]
-                del sort
-        else:
-            # We can use old data and save disk reading.
-            (parent_IDs, parent_masses, parent_halos) = last
-        # Used to communicate un-matched particles.
-        parent_send = np.ones(parent_IDs.size, dtype='bool')
-
-        # Now get the child halo data.
-        child_names = list(self.names[child_currt])
-        child_names.sort()
-        child_IDs = []
-        child_masses = []
-        child_halos = []
-        for i,cname in enumerate(child_names):
-            if i>=self.comm.rank and i%self.comm.size==self.comm.rank:
-                h5fp = h5py.File(cname)
-                for group in h5fp:
-                    gID = int(group[4:])
-                    thisIDs = h5fp[group]['particle_index'][:]
-                    thisMasses = h5fp[group]['ParticleMassMsun'][:]
-                    child_IDs.append(thisIDs)
-                    child_masses.append(thisMasses)
-                    child_halos.append(np.ones(len(thisIDs),
-                        dtype='int32') * gID)
-                    del thisIDs, thisMasses
-                h5fp.close()
-        # Sort the arrays by particle index in ascending order.
-        if len(child_IDs)==0:
-            child_IDs = np.array([], dtype='int64')
-            child_masses = np.array([], dtype='float64')
-            child_halos = np.array([], dtype='int32')
-        else:
-            child_IDs = np.concatenate(child_IDs).astype('int64')
-            child_masses = np.concatenate(child_masses)
-            child_halos = np.concatenate(child_halos)
-            sort = child_IDs.argsort()
-            child_IDs = child_IDs[sort]
-            child_masses = child_masses[sort]
-            child_halos = child_halos[sort]
-            del sort
-        
-        child_send = np.ones(child_IDs.size, dtype='bool')
-        
-        # Match particles in halos.
-        self._match(parent_IDs, child_IDs, parent_halos, child_halos,
-            parent_masses, parent_send, child_send)
-
-        # Now we send all the un-matched particles to the root task for one more
-        # pass. This depends on the assumption that most of the particles do
-        # not move very much between data dumps, so that not too many particles
-        # will be dumped on the single task.
-        parent_IDs_tosend = parent_IDs[parent_send]
-        parent_masses_tosend = parent_masses[parent_send]
-        parent_halos_tosend = parent_halos[parent_send]
-        child_IDs_tosend = child_IDs[child_send]
-        child_halos_tosend = child_halos[child_send]
-        del parent_send, child_send
-        
-        parent_IDs_tosend = self.comm.par_combine_object(parent_IDs_tosend,
-                datatype="array", op="cat")
-        parent_masses_tosend = self.comm.par_combine_object(parent_masses_tosend,
-                datatype="array", op="cat")
-        parent_halos_tosend = self.comm.par_combine_object(parent_halos_tosend,
-                datatype="array", op="cat")
-        child_IDs_tosend = self.comm.par_combine_object(child_IDs_tosend,
-                datatype="array", op="cat")
-        child_halos_tosend = self.comm.par_combine_object(child_halos_tosend,
-                datatype="array", op="cat")
-
-        # Resort the received particles.
-        Psort = parent_IDs_tosend.argsort()
-        parent_IDs_tosend = parent_IDs_tosend[Psort]
-        parent_masses_tosend = parent_masses_tosend[Psort]
-        parent_halos_tosend = parent_halos_tosend[Psort]
-        Csort = child_IDs_tosend.argsort()
-        child_IDs_tosend = child_IDs_tosend[Csort]
-        child_halos_tosend = child_halos_tosend[Csort]
-        del Psort, Csort
-
-        # Now again, but only on the root task.
-        if self.comm.rank == 0:
-            self._match(parent_IDs_tosend, child_IDs_tosend,
-            parent_halos_tosend, child_halos_tosend, parent_masses_tosend)
-
-        # Now we sum up the contributions globally.
-        self.child_mass_arr = self.comm.mpi_allreduce(self.child_mass_arr)
-        
-        # Trim off the garbage collection.
-        self.child_mass_arr = self.child_mass_arr[:-1]
-        
-        if self.comm.rank == 0:
-            # Turn these Msol masses into percentages of the parent.
-            line = "SELECT HaloMass FROM Halos WHERE SnapCurrentTimeIdentifier=%d \
-            ORDER BY SnapHaloID ASC;" % parent_currt
-            self.cursor.execute(line)
-            mark = 0
-            result = self.cursor.fetchone()
-            while result:
-                mass = result[0]
-                self.child_mass_arr[mark:mark+NumNeighbors] /= mass
-                mark += NumNeighbors
-                result = self.cursor.fetchone()
-            
-            # Get the global ID for the SnapHaloID=0 from the child, this will
-            # be used to prevent unnecessary SQL reads.
-            line = "SELECT GlobalHaloID FROM Halos WHERE SnapCurrentTimeIdentifier=%d \
-            AND SnapHaloID=0;" % child_currt
-            self.cursor.execute(line)
-            baseChildID = self.cursor.fetchone()[0]
-        else:
-            baseChildID = None
-        
-        # Sync up data on all tasks.
-        self.child_mass_arr = self.comm.mpi_bcast(self.child_mass_arr)
-        baseChildID = self.comm.mpi_bcast(baseChildID)
-        
-        # Now we prepare a big list of writes to put in the database.
-        for i,parent_halo in enumerate(sorted(self.candidates)):
-            child_indexes = []
-            child_per = []
-            for j,child in enumerate(self.candidates[parent_halo]):
-                if child == -1:
-                    # Account for fake children.
-                    child_indexes.append(-1)
-                    child_per.append(0.)
-                    continue
-                # We need to get the GlobalHaloID for this child.
-                child_globalID = baseChildID + child
-                child_indexes.append(child_globalID)
-                child_per.append(self.child_mass_arr[i*NumNeighbors + j])
-            # Sort by percentages, desending.
-            child_per, child_indexes = zip(*sorted(zip(child_per, child_indexes), reverse=True))
-            values = []
-            for pair_count, pair in enumerate(zip(child_indexes, child_per)):
-                if pair_count == NumDB: break
-                values.extend([int(pair[0]), float(pair[1])])
-            #values.extend([parent_currt, parent_halo])
-            # This has the child ID, child percent listed NumDB times, followed
-            # by the currt and this parent halo ID (SnapHaloID).
-            #values = tuple(values)
-            self.write_values.append(values)
-            self.write_values_dict[parent_currt][parent_halo] = values
-
-        # Clean up.
-        del parent_IDs, parent_masses, parent_halos
-        del parent_IDs_tosend, parent_masses_tosend
-        del parent_halos_tosend, child_IDs_tosend, child_halos_tosend
-        gc.collect()
-        
-        return (child_IDs, child_masses, child_halos)
-
-    def _match(self, parent_IDs, child_IDs, parent_halos, child_halos,
-            parent_masses, parent_send = None, child_send = None):
-        # Pick out IDs that are in both arrays.
-        parent_in_child = np.in1d(parent_IDs, child_IDs, assume_unique = True)
-        child_in_parent = np.in1d(child_IDs, parent_IDs, assume_unique = True)
-        # Pare down the arrays to just matched particle IDs.
-        parent_halos_cut = parent_halos[parent_in_child]
-        child_halos_cut = child_halos[child_in_parent]
-        parent_masses_cut = parent_masses[parent_in_child]
-        # Mark the IDs that have matches so they're not sent later.
-        if parent_send is not None:
-            parent_send[parent_in_child] = False
-            child_send[child_in_parent] = False
-        # For matching pairs of particles, add the contribution of the mass.
-        # Occasionally, there are matches of particle IDs where the parent
-        # and child halos have not been identified as likely relations,
-        # and in that case loc will be returned as -1, which is the 'garbage'
-        # position in child_mass_arr. This will be trimmed off later.
-        for i,pair in enumerate(zip(parent_halos_cut, child_halos_cut)):
-            loc = self.child_mass_loc[pair[0]][pair[1]]
-            self.child_mass_arr[loc] += parent_masses_cut[i]
-        if parent_send is None:
-            mylog.info("Clean-up round matched %d of %d parents and %d children." % \
-            (parent_in_child.sum(), parent_IDs.size, child_IDs.size))
-
-    def _copy_and_update_db(self):
-        """
-        Because doing an UPDATE of a SQLite database is really slow, what we'll
-        do here is basically read in lines from the database, and then insert
-        the parent-child relationships, writing to a new DB.
-        """
-        # All of this happens only on the root task!
-        temp_name = self.database + '-tmp'
-        to_write = []
-        # Open the temporary database.
-        try:
-            os.remove(temp_name)
-        except OSError:
-            pass
-        temp_conn = sql.connect(temp_name)
-        temp_cursor = temp_conn.cursor()
-        line = "CREATE TABLE Halos (GlobalHaloID INTEGER PRIMARY KEY,\
-                SnapCurrentTimeIdentifier INTEGER, SnapZ FLOAT, SnapHaloID INTEGER, \
-                HaloMass FLOAT,\
-                NumPart INTEGER, CenMassX FLOAT, CenMassY FLOAT,\
-                CenMassZ FLOAT, BulkVelX FLOAT, BulkVelY FLOAT, BulkVelZ FLOAT,\
-                MaxRad FLOAT,\
-                ChildHaloID0 INTEGER, ChildHaloFrac0 FLOAT, \
-                ChildHaloID1 INTEGER, ChildHaloFrac1 FLOAT, \
-                ChildHaloID2 INTEGER, ChildHaloFrac2 FLOAT, \
-                ChildHaloID3 INTEGER, ChildHaloFrac3 FLOAT, \
-                ChildHaloID4 INTEGER, ChildHaloFrac4 FLOAT);"
-        temp_cursor.execute(line)
-        temp_conn.commit()
-        # Get all the data!
-        self.cursor.execute("SELECT * FROM Halos;")
-        results = self.cursor.fetchone()
-        while results:
-            results = list(results)
-            currt = results[1]
-            hid = results[3]
-            # If for some reason this halo doesn't have relationships,
-            # we'll just keep the old results the same.
-            try:
-                lookup = self.write_values_dict[currt][hid]
-                new = tuple(results[:-10] + lookup)
-            except KeyError:
-                new = tuple(results)
-            to_write.append(new)
-            results = self.cursor.fetchone()
-        # Now write to the temp database.
-        # 23 question marks for 23 data columns.
-        line = ''
-        for i in range(23):
-            line += '?,'
-        # Pull off the last comma.
-        line = 'INSERT into Halos VALUES (' + line[:-1] + ')'
-        for insert in to_write:
-            temp_cursor.execute(line, insert)
-        temp_conn.commit()
-        temp_cursor.close()
-        temp_conn.close()
-        self._close_database()
-        os.rename(temp_name, self.database)
-
-    def _write_index(self):
-        mylog.info("Creating database index.")
-        line = "CREATE INDEX IF NOT EXISTS HalosIndex ON Halos ("
-        for name in columns:
-            line += name +","
-        line = line[:-1] + ");"
-        self.cursor.execute(line)
-
-    def _find_progress(self):
-        # This queries the database to see how far along work has already come
-        # to identify parent->child relationships.
-        line = """SELECT ChildHaloID0, SnapZ from halos WHERE SnapHaloID = 0
-        ORDER BY SnapZ DESC;"""
-        self.cursor.execute(line)
-        results = self.cursor.fetchone()
-        while results:
-            results = list(results)
-            if results[0] == -1:
-                # We've hit a dump that does not have relationships. Save this.
-                return results[1] # the SnapZ.
-            results = self.cursor.fetchone()
-        return 0.
-
-class MergerTreeConnect(DatabaseFunctions):
-    r"""Create a convenience object for accessing data from the halo database.
-    
-    Parameters
-    ----------
-    database : String
-        The name of the halo database to access. Default = 'halos.db'.
-    
-    Examples
-    -------
-    >>> mtc = MergerTreeConnect('/home/user/sim1-halos.db')
-    """
-    def __init__(self, database='halos.db'):
-        self.database = database
-        result = self._open_database()
-        if not result:
-            return None
-    
-    def close(self):
-        r"""Cleanly close access to the database.
-        
-        Examples
-        --------
-        >>> mtc.close()
-        """
-        # To be more like typical Python open/close.
-        self._close_database()
-    
-    def query(self, string):
-        r"""Performs a query of the database and returns the results as a list
-        of tuples, even if the result is singular.
-        
-        Parameters
-        ----------
-        
-        string : str
-            The SQL query of the database.
-        
-        Examples
-        --------
-
-        >>> results = mtc.query("SELECT GlobalHaloID from Halos where SnapHaloID = 0 and \
-        ...    SnapZ = 0;")
-        """
-        # Query the database and return a list of tuples.
-        if string is None:
-            mylog.error("You must enter a SQL query.")
-            return None
-        items = []
-        self.cursor.execute(string)
-        results = self.cursor.fetchone()
-        while results:
-            items.append(results)
-            results = self.cursor.fetchone()
-        return items
-
-    def get_GlobalHaloID(self, SnapHaloID, z):
-        r"""Returns the GlobalHaloID for the given halo.
-        
-        Parameters
-        ----------
-
-        SnapHaloID : Integer
-            The index label for the halo of interest, equivalent to
-            the first column of the halo finder text output file.
-        z : Float
-            The redshift for the halo of interest. The value returned will be
-            for the halo with SnapHaloID equal to ID (above) with redshift
-            closest to this value.
-        
-        Examples
-        --------
-
-        >>> this_halo = mtc.get_GlobalHaloID(0, 0.)
-        """
-        string = "SELECT GlobalHaloID,SnapZ FROM Halos WHERE SnapHaloID = %d;" \
-            % SnapHaloID
-        minz = 99999.
-        # If -1 is returned, something went wrong.
-        this_halo = -1
-        self.cursor.execute(string)
-        results = self.cursor.fetchone()
-        while results:
-            if abs(results[1] - z) < minz:
-                minz = abs(results[1] - z)
-                this_halo = results[0]
-            results = self.cursor.fetchone()
-        return this_halo
-
-    def get_halo_parents(self, GlobalHaloID):
-        r"""Returns a list of the parent halos to the given halo, along with
-        the contribution fractions from parent to child.
-        
-        This function returns a list of lists, where each entry in the top list
-        is [GlobalHaloID, ChildHaloFrac] of the parent halo in relationship
-        to the given child halo.
-        
-        Parameters
-        ----------
-        GlobalHaloID : Integer
-            The GlobalHaloID of the halo of interest.
-        
-        Examples
-        --------
-        >>> parents = mtc.get_halo_parents(1688)
-        >>> print parents
-        [[1544, 0.9642857141249418],
-         [1613, 0.0],
-         [1614, 0.0],
-         [1489, 0.0],
-         [1512, 0.0],
-         [1519, 0.0],
-         [1609, 0.0]]
-        """
-        parents = []
-        for i in range(NumDB):
-            string = "SELECT GlobalHaloID, ChildHaloFrac%d FROM Halos\
-            WHERE ChildHaloID%d=%d;" % (i, i, GlobalHaloID)
-            self.cursor.execute(string)
-            results = self.cursor.fetchone()
-            while results:
-                parents.append([results[0], results[1]])
-                results = self.cursor.fetchone()
-        return parents
-
-    def get_direct_parent(self, GlobalHaloID):
-        r"""Returns the GlobalHaloID of the direct parent of the given halo.
-        
-        This is accomplished by identifying the most massive parent halo
-        that contributes at least 50% of its mass to the given halo.
-        
-        Parameters
-        ----------
-        GlobalHaloID : Integer
-            The GlobalHaloID of the halo of interest.
-        
-        Examples
-        --------
-        >>> parent = mtc.get_direct_parent(1688)
-        >>> print parent
-        1544
-        """
-        parents = self.get_halo_parents(GlobalHaloID)
-        mass = 0
-        ID = None
-        for parent in parents:
-            if parent[1] < 0.5: continue
-            info = self.get_halo_info(parent[0])
-            if info['HaloMass'] > mass:
-                mass = info['HaloMass']
-                ID = parent[0]
-        return ID
-
-    def get_halo_info(self, GlobalHaloID):
-        r"""Returns all available information for the given GlobalHaloID
-        in the form of a dict.
-        
-        Parameters
-        ----------
-        GlobalHaloID : Integer
-            The unique index for the halo of interest.
-        
-        Examples
-        --------
-        >>> info = mtc.get_halo_info(1544)
-        >>> print info
-        {'BulkVelX': -32759799.359999999,
-         'BulkVelY': -28740239.109999999,
-         'BulkVelZ': -20066000.690000001,
-         'CenMassX': 0.23059111360000001,
-         'CenMassY': 0.4061139809,
-         'CenMassZ': 0.80882763749999997,
-         'ChildHaloFrac0': 0.9642857141249418,
-         'ChildHaloFrac1': 0.0,
-         'ChildHaloFrac2': 0.0,
-         'ChildHaloFrac3': 0.0,
-         'ChildHaloFrac4': 0.0,
-         'ChildHaloID0': 1688,
-         'ChildHaloID1': 1712,
-         'ChildHaloID2': 1664,
-         'ChildHaloID3': 1657,
-         'ChildHaloID4': 1634,
-         'GlobalHaloID': 1544,
-         'HaloMass': 20934692770000.0,
-         'MaxRad': 0.01531299899,
-         'NumPart': 196,
-         'SnapCurrentTimeIdentifier': 1275946788,
-         'SnapHaloID': 56,
-         'SnapZ': 0.024169713061444002}
-        """
-        string = "SELECT * FROM Halos WHERE GlobalHaloID=%d;" % GlobalHaloID
-        d = {}
-        self.cursor.execute(string)
-        results = self.cursor.fetchone()
-        for pair in zip(columns, results):
-            d[pair[0]] = pair[1]
-        return d
-
-class Node(object):
-    def __init__(self, CoM, mass, parentIDs, z, color):
-        self.CoM = CoM
-        self.mass = mass
-        self.parentIDs = parentIDs # In descending order of contribution
-        self.z = z
-        self.color = color
-
-class Link(object):
-    def __init__(self):
-        self.childIDs = []
-        self.fractions = []
-
-class MergerTreeDotOutput(DatabaseFunctions, ParallelAnalysisInterface):
-    r"""Output the merger tree history for a given set of halo(s) in Graphviz
-    format.
-    
-    Parameters
-    ----------
-
-    halos : Integer or list of integers
-        If current_time below is not specified or is None, this is an integer
-        or list of integers with the GlobalHaloIDs of the halos to be
-        tracked. If current_time is specified, this is the SnapHaloIDs
-        for the halos to be tracked, which is identical to what is in
-        HopAnalysis.out files (for example).
-    database : String
-        The name of the database file. Default = 'halos.db'.
-    dotfile : String
-        The name of the file to write to. Default = 'MergerTree.gv'.
-        The suffix of this name gives the format of the output file,
-        so 'MergerTree.jpg' would output a jpg file. "dot -v" (from the
-        command line) will print
-        a list of image formats supported on the system. The default
-        suffix '.gv' will output the results to a text file in the Graphviz
-        markup language.
-    current_time : Integer
-        The SnapCurrentTimeIdentifier for the snapshot for the halos to
-        be tracked. This is identical to the CurrentTimeIdentifier in
-        Enzo restart files. Default = None.
-    link_min : Float
-        When establishing a parent/child relationship, this is the minimum
-        mass fraction of the parent halo contributed to
-        the child halo that will be tracked
-        while building the Graphviz file. Default = 0.2.
-    
-    Examples
-    --------
-
-    >>> MergerTreeDotOutput(halos=182842, database='/home/user/sim1-halos.db',
-    ... dotfile = 'halo-182842.gv')
-    """
-    def __init__(self, halos=None, database='halos.db',
-            dotfile='MergerTree.gv', current_time=None, link_min=0.2):
-        ParallelAnalysisInterface.__init__(self)
-        self.database = database
-        self.link_min = link_min
-        if halos is None:
-            mylog.error("Please provide at least one halo to start the tree. Exiting.")
-            return None
-        result = self._open_database()
-        if not result:
-            mylog.warn("The database did not open correctly!")
-            return None
-        if type(halos) == types.IntType:
-            halos = [halos]
-        if current_time is not None:
-            halos = self._translate_haloIDs(halos, current_time)
-        newhalos = set(halos)
-        # Create the pydot graph object.
-        self.graph = pydot.Dot('galaxy', graph_type='digraph')
-        # Build some initially empty subgraphs, which are used to identify
-        # nodes that are on the same rank (redshift).
-        line = "SELECT DISTINCT SnapZ FROM Halos;"
-        self.cursor.execute(line)
-        self.subgs = {}
-        result = self.cursor.fetchone()
-        while result:
-            self.subgs[result[0]] = pydot.Subgraph('', rank = 'same')
-            self.graph.add_subgraph(self.subgs[result[0]])
-            result = self.cursor.fetchone()
-        # For the first set of halos.
-        self._add_nodes(newhalos)
-        # Recurse over parents.
-        while len(newhalos) > 0:
-            mylog.info("Finding parents for %d children." % len(newhalos))
-            newhalos = self._find_parents(newhalos)
-            self._add_nodes(newhalos)
-        self._write_dotfile(dotfile)
-        return None
-
-    def _translate_haloIDs(self, halos, current_time):
-        # If the input is in the haloID equivalent to SnapHaloID, translate them
-        # to GlobalHaloIDs.
-        new_haloIDs=[]
-        for halo in halos:
-            line = "SELECT GlobalHaloID FROM Halos WHERE SnapHaloID=? AND \
-            SnapCurrentTimeIdentifier=? limit 1;"
-            values = (halo, current_time)
-            self.cursor.execute(line, values)
-            new_haloIDs.append(self.cursor.fetchone()[0])
-        return new_haloIDs
-        
-    def _find_parents(self, halos):
-        # Given a set of halos, find their parents and add that to each of their
-        # node records. At the same time, make a link record for that
-        # relationship.
-        # This stores the newly discovered parent halos.
-        newhalos = set([])
-        for halo in halos:
-            line = "SELECT GlobalHaloID, ChildHaloFrac0,\
-                ChildHaloFrac1, ChildHaloFrac2,ChildHaloFrac3, ChildHaloFrac4,\
-                ChildHaloID0, ChildHaloID1, ChildHaloID2, \
-                ChildHaloID3, ChildHaloID4 \
-                FROM Halos WHERE\
-                ChildHaloID0=? or ChildHaloID1=? or ChildHaloID2=? or\
-                ChildHaloID3=? or ChildHaloID4=?;"
-            values = (halo, halo, halo, halo, halo)
-            self.cursor.execute(line, values)
-            result = self.cursor.fetchone()
-            while result:
-                res = list(result)
-                pID = result[0]
-                dsracs = res[1:6]
-                cIDs = res[6:11]
-                for pair in zip(cIDs, dsracs):
-                    if pair[1] <= self.link_min or pair[0] != halo:
-                        continue
-                    else:
-                        self.graph.add_edge(pydot.Edge(pID, halo,
-                        label = "%3.2f%%" % float(pair[1]*100),
-                        color = "blue", 
-                        fontsize = "10"))
-                        newhalos.add(pID)
-                result = self.cursor.fetchone()
-        return newhalos
-    
-    def _add_nodes(self, newhalos):
-        # Each call of this function always happens for a set of newhalos that
-        # are at the same z. To give the halos color we will figure out how
-        # many halos total were found this z.
-        # There's probably a way to do this with only one SQL operation.
-        if len(newhalos) == 0:
-            return
-        ahalo = list(newhalos)[0]
-        line = 'SELECT SnapCurrentTimeIdentifier FROM Halos WHERE GlobalHaloID=?;'
-        values = (ahalo,)
-        self.cursor.execute(line, values)
-        result = self.cursor.fetchone()
-        # Use currt to get the number.
-        line = 'SELECT max(SnapHaloID) FROM Halos where SnapCurrentTimeIdentifier=?;'
-        values = (result[0],)
-        self.cursor.execute(line, values)
-        maxID = self.cursor.fetchone()[0]
-        # For the new halos, create nodes for them.
-        for halo in newhalos:
-            line = 'SELECT SnapZ, HaloMass, CenMassX, CenMassY, CenMassZ,\
-            SnapHaloID FROM Halos WHERE GlobalHaloID=? limit 1;'
-            value = (halo,)
-            self.cursor.execute(line, value)
-            result = self.cursor.fetchone()
-            # Add the node to the pydot graph.
-            color_float = 1. - float(result[5])/(maxID+1)
-            self.graph.add_node(pydot.Node(halo,
-                label = "{%1.3e\\n(%1.3f,%1.3f,%1.3f)}" % \
-                (result[1], result[2], result[3], result[4]),
-                shape = "record",
-                color = "%0.3f 1. %0.3f" % (color_float, color_float)))
-            # Add this node to the correct subgraph.
-            self.subgs[result[0]].add_node(pydot.Node(halo))
-            # If this was the first node added to this subgraph, also add
-            # the lone node for the redshift value.
-            if len(self.subgs[result[0]].get_node_list()) == 1:
-                self.subgs[result[0]].add_node(pydot.Node("%1.5e" % result[0],
-                label = "%1.5f" % result[0],
-                shape = "record", color = "green"))
-
-    def _write_dotfile(self, dotfile):
-        # Based on the suffix of the file name, write out the result to a file.
-        suffix = dotfile.split(".")[-1]
-        if suffix == "gv": suffix = "raw"
-        mylog.info("Writing %s format %s to disk." % (suffix, dotfile))
-        self.graph.write("%s" % dotfile, format=suffix)
-
-class MergerTreeTextOutput(DatabaseFunctions, ParallelAnalysisInterface):
-    r"""Dump the contents of the merger tree database to a text file.
-    This is generally not recommended.
-    
-    Parameters
-    ----------
-    database : String
-        Name of the database to access. Default = 'halos.db'.
-    outfile : String
-        Name of the file to write to. Default = 'MergerTreeDB.txt'.
-    
-    Examples
-    --------
-    >>> MergerTreeTextOutput(database='/home/user/sim1-halos.db',
-    ... outfile='halos-db.txt')
-    """
-    def __init__(self, database='halos.db', outfile='MergerTreeDB.txt'):
-        ParallelAnalysisInterface.__init__(self)
-        self.database = database
-        self.outfile = outfile
-        result = self._open_database()
-        if not result:
-            mylog.warn("Database file not read correctly!")
-            return None
-        self._write_out()
-        self._close_database()
-        return None
-    
-    def _write_out(self):
-        # Essentially dump the contents of the database into a text file.
-        fp = open(self.outfile, "w")
-        # Make the header line.
-        spacing = {}
-        for column in columns:
-            spacing[column] = (max(15,len(column)+1))
-        line = "# "
-        for column in columns:
-            line += "%s" % column.ljust(spacing[column])
-        line += "\n"
-        fp.write(line)
-        # Get the data.
-        line = "SELECT * FROM Halos ORDER BY SnapZ DESC, SnapHaloID ASC;"
-        self.cursor.execute(line)
-        results = self.cursor.fetchone()
-        # Write out the columns.
-        while results:
-            line = "  "
-            for i,column in enumerate(columns):
-                if column_types[column] == "FLOAT":
-                    this = "%1.6e" % results[i]
-                    line += this.ljust(spacing[column])
-                if column_types[column] == "INTEGER":
-                    this = "%d" % results[i]
-                    line += this.ljust(spacing[column])
-            line += "\n"
-            fp.write(line)
-            results = self.cursor.fetchone()
-        fp.close()
-        

diff -r e97899b530819bec400dbe2127aba867d227dc0b -r 0c505346d53132d4d13791493c3413d4be69afde yt/analysis_modules/halo_merger_tree/setup.py
--- a/yt/analysis_modules/halo_merger_tree/setup.py
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-import setuptools
-import os
-import sys
-import os.path
-
-
-def configuration(parent_package='', top_path=None):
-    from numpy.distutils.misc_util import Configuration
-    config = Configuration('halo_merger_tree', parent_package, top_path)
-    config.make_config_py()  # installs __config__.py
-    #config.make_svn_version_py()
-    return config


https://bitbucket.org/yt_analysis/yt/commits/950c0a524631/
Changeset:   950c0a524631
Branch:      yt-3.0
User:        brittonsmith
Date:        2014-07-24 01:42:01
Summary:     Updating halo merger tree docs.
Affected #:  3 files

diff -r 0c505346d53132d4d13791493c3413d4be69afde -r 950c0a524631f152732f8df6bda4dbe1fa572e98 doc/source/analyzing/analysis_modules/halo_analysis.rst
--- a/doc/source/analyzing/analysis_modules/halo_analysis.rst
+++ b/doc/source/analyzing/analysis_modules/halo_analysis.rst
@@ -11,4 +11,5 @@
    halo_transition
    halo_finding
    halo_mass_function
+   halo_merger_tree
    halo_analysis_example

diff -r 0c505346d53132d4d13791493c3413d4be69afde -r 950c0a524631f152732f8df6bda4dbe1fa572e98 doc/source/analyzing/analysis_modules/halo_merger_tree.rst
--- /dev/null
+++ b/doc/source/analyzing/analysis_modules/halo_merger_tree.rst
@@ -0,0 +1,6 @@
+.. _merger_tree:
+
+Halo Merger Tree
+================
+
+.. note:: As of :code:`yt-3.0`, the halo merger tree functionality has been removed to be replaced by machinery that works with the ``HaloCatalog`` object.  In the mean time, this functionality can still be found in :code:`yt-2.x`.

diff -r 0c505346d53132d4d13791493c3413d4be69afde -r 950c0a524631f152732f8df6bda4dbe1fa572e98 doc/source/analyzing/analysis_modules/merger_tree.rst
--- a/doc/source/analyzing/analysis_modules/merger_tree.rst
+++ /dev/null
@@ -1,767 +0,0 @@
-.. _merger_tree:
-
-Halo Merger Tree
-================
-
-.. note:: At the moment the merger tree is not yet implemented using new 
-    halo catalog functionality. 
-
-The Halo Merger Tree extension is capable of building a database of halo mergers
-over a set of time-ordered Enzo datasets. The fractional contribution of older
-'parent' halos to younger 'child' halos is calculated by comparing the unique
-index labels of their constituent particles. The data is stored in a
-`SQLite <http://sqlite.org/>`_ database which enables the use of powerful
-and fast SQL queries over all the halos.
-
-General Overview
-----------------
-
-The first requirement is a set of sequential datasets.
-The detail of the merger tree is increased as the difference in
-time between snapshots is reduced, at the cost of higher computational effort
-for the tree itself and and disk usage for the snapshots.
-The merger tree relies on the output of one of the Halo Finders in yt, and the
-user can choose which one to use.
-The merger tree is capable of running the halo finder if it hasn't already
-been done.
-Once halo finding is accomplished for all the data snapshots, the halo
-lineage is calculated by comparing the particle membership of halos between
-pairs of time steps.
-The halo data and tree data is stored in the SQLite database.
-
-Clearly, another requirement is that Python has the
-`sqlite3 library <http://docs.python.org/library/sqlite3.html>`_
-installed.
-This should be built along with everything else yt needs
-if the ``install_script.sh`` was used.
-
-The merger tree can be calculated in parallel, and if necessary, it will run
-the halo finding in parallel as well. Please see the note below about the
-special considerations needed for Network File Systems.
-
-There is a convenience-wrapper for querying the database, called
-``MergerTreeConnect``.
-It simplifies accessing data in the database.
-
-There are two output classes for the merger tree. The ``MergerTreeDotOutput`` class
-outputs the tree for a user-specified subset of halos to a
-`Graphviz format <http://graphviz.org/>`_ file.
-Graphviz is an open-source package for visualizing connected objects in a
-graphical way.
-There are binary distributions for all major operating systems.
-It is also possible to dump the contents of the SQLite database to a simple text file
-with the ``MergerTreeTextOutput`` class.
-The data is saved in columnar format.
-
-Conceptual Primer
-~~~~~~~~~~~~~~~~~
-
-The best way to view the merger tree extension is as a two-part process.
-First, the merger tree is built and stored in the database.
-This process can be quite time consuming, depending on the size of the simulation,
-and the number and size of halos found in the snapshots.
-This is not a process one wants to do very often, and why it is separate
-from the analysis parts.
-
-The second part is actually a many-part process, which is the analysis of the
-merger tree itself.
-The first step is computationally intensive, but the analysis step
-is user-intensive.
-The user needs to decide what to pull out of the merger tree
-and figure out how to extract the needed data with SQL statements.
-Once an analysis pipeline is written, it should run very fast for even
-very large databases.
-
-A Note About Network File Systems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Accessing a SQLite database stored on a Network (or Distributed) File System (NFS)
-is a risky thing to do, particularly if more than one task wants to write
-at the same time (`see more here <http://www.sqlite.org/lockingv3.html#how_to_corrupt>`_).
-NFS disks can store files on multiple physical hard drives, and it can take time
-for changes made by one task to appear to all the parallel tasks.
-Only one task of the merger tree ever interacts with the database,
-so these dangers are minimal,
-but in general it's a good idea to know something about the disk used to
-store the database.
-
-In general, it is recommended to keep the database on a 'real disk' 
-(/tmp for example, if all the tasks are on the same SMP node,
-or RAM disk for extra speed) if possible,
-but it should work on a NFS disk as well.
-If a temporary disk is used to store the database while it's being built,
-remember to copy the file to a permanent disk after the merger tree script
-is finished.
-
-
-Running and Using the Halo Merger Tree
---------------------------------------
-
-It is very simple to create a merger tree database for a series of snapshots.
-The most difficult part is creating an ordered list of Enzo restart files.
-There are two ways to do it, by hand or with the EnzoSimulation extension.
-
-By Hand
-~~~~~~~
-
-Here is an example of how to build the list and build the database by hand.
-Here, the snapshots are stored in directories named DD????, and the enzo
-restart file named data????, where ???? is a four digit zero-padded integer.
-The final snapshot considered (most progressed in time) is DD0116,
-and the earliest that will be examined is DD0100.
-The database will be saved to ``/path/to/database/halos.db``.
-This example below works identically in serial or in parallel.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  from yt.analysis_modules.halo_finding.api import *
-
-  files = []
-  start = 100
-  finish = 116
-  for i in range(start, finish + 1):
-      files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
-  MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-If the halos have not been found previously for the snapshots, the halo finder
-will be run automatically. See the note about this below.
-
-Using EnzoSimulation
-~~~~~~~~~~~~~~~~~~~~
-
-Here is how to build the input list of restart files using the EnzoSimulation
-extension.
-It is possible to set range and interval between snapshots.
-Please see the EnzoSimulation
-documentation (:ref:`analyzing-an-entire-simulation`) for details.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  from yt.analysis_modules.halo_finding.api import *
-  import yt.analysis_modules.simulation_handler.api as ES
-  
-  es = ES.EnzoSimulation('/path/to/snapshots/simulation.par')
-  
-  files = []
-  for output in es.allOutputs:
-      files.append(output['filename'])
-
-  MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-Merger Tree Parallelism
------------------------
-
-If the halos are to be found during the course of building the merger tree,
-run with an appropriate number of tasks to the size of the dataset and the
-halo finder used.
-The speed of the merger tree itself,
-which compares halo membership in parallel very effectively,
-is almost completely constrained by the read/write times of the SQLite file.
-In tests with the halos pre-located, there is not much speedup beyond two MPI tasks.
-There is no negative effect with running the merger tree with more tasks (which is
-why if halos are to be found by the merger tree, the merger tree should be
-run with as many tasks as that step requires), and indeed if the simulation
-is a large one, running in parallel does provide memory parallelism,
-which is important.
-
-How The Database Is Handled In Analysis Restarts
-------------------------------------------------
-
-The Merger Tree is designed to allow the merger tree database to be built
-incrementally.
-For example, if a simulation is currently being run, the merger
-tree database can be built for the available datasets, and when new ones are
-created, the database extended to include them.
-So if there are going to be
-60 data snapshots total (indexed (0, 1, 2, ..., 59)), and only 50 are saved when the
-tree is first built, the analysis should be done on datasets [0, 49].
-If the last ten become available, re-run the merger tree on datasets [49, 59]
-referencing the same database as before.
-By referencing the same database as before, work does not need to be repeated.
-
-If the merger tree process is interrupted before completion (say, if the 
-jobs walltime is exceeded and the scheduler kills it), just run the exact
-same job again.
-The merger tree will check to see what work has already been completed, and
-resume where it left off.
-
-Additional Parameters
-~~~~~~~~~~~~~~~~~~~~~
-
-When calling ``MergerTree``, there are three parameters that control how the
-halo finder is run, if it needs to be run.
-
-  * ``halo_finder_function`` (name) - Which of the halo finders (:ref:`halo_finding`)
-    to use. Default: ``HaloFinder`` (HOP).
-  * ``halo_finder_threshold`` (float) - When using HOP or Parallel HOP, this sets the
-    threshold used. Default: 80.0.
-  * ``FOF_link_length`` (float) - When using Friends of Friends (FOFHaloFinder), this sets
-    the inter-particle link length used. Default: 0.2.
-  * ``dm_only`` (bool) - Whether to include stars (False), or only the dark
-    matter particles when building halos (True).
-    Default: False.
-  * ``refresh`` (bool) - If set to True, this will run the halo finder and
-    rebuild the database regardless of whether or not the halo files or
-    database exist on disk already.
-    Default: False.
-  * ``index`` (bool) - Whether to add an index to the SQLite file. True makes
-    SQL searches faster at the cost of additional disk space. Default=True.
-
-Example using Parallel HOP:
-
-.. code-block:: python
-
-  MergerTree(restart_files=files, database='/path/to/database/halos.db',
-      halo_finder_function=parallelHF, halo_finder_threshold=100.)
-
-Pre-Computing Halos
-~~~~~~~~~~~~~~~~~~~
-
-If halo finding is to happen before the merger tree is calculated, and the
-work is not to be wasted, special care
-should be taken to ensure that all the data required for the merger tree is
-saved.
-By default, the merger tree looks for files that begin with the name ``MergerHalos``
-in the same directory as each Enzo restart file,
-and if those files are missing or renamed, halo finding will be performed again.
-If ``halos`` is the list of halos returned by the halo finder, these three
-commands should be called to save the needed data:
-
-.. code-block:: python
-
-  halos.write_out('MergerHalos.out')
-  halos.write_particle_lists('MergerHalos')
-  halos.write_particle_lists_txt('MergerHalos')
-
-There is a convenience function that will call the three functions above
-at one time:
-
-.. code-block:: python
-
-  halos.dump('MergerHalos')
-
-Please see the documents on halo finding for more information on what these
-commands do (:ref:`halo_finding`).
-
-Accessing Data in the Database
-------------------------------
-
-SQLite databases support nearly all of the standard SQL queries.
-It is possible to write very complicated and powerful SQL queries, but below
-only simple examples will are shown. Please see other resources (WWW, books) for
-more on how to write SQL queries.
-
-It is possible to read and modify a SQLite database from the command line using
-the ``sqlite3`` command (e.g. ``sqlite3 database.db``). It can be very convenient
-to use this to quickly inspect a database, but is not suitable for extracting or inserting
-large amounts of data. There are many examples (again, see the WWW or books)
-available on how to use the command line ``sqlite3`` command.
-
-The table containing halo data in the database is named 'Halos'.
-All queries for halo data will come from this table.
-The table has these columns:
-
-  #. ``GlobalHaloID`` (int) - A fully-unique identifier for the halo.
-  #. ``SnapCurrentTimeIdentifier`` (int) - An unique time identifier for the snapshot
-     the halo comes from. Equivalent to 'CurrentTimeIdentifier' from the Enzo
-     restart file.
-  #. ``SnapZ`` (float) - The redshift for the halo.
-  #. ``SnapHaloID`` (int) - The halo ID for the halo taken from the output of the
-     halo finder (i.e. 'halos.write_out("HopAnalysis.out")'). It is unique for halos
-     in the same snapshot, but not unique across the full database.
-  #. ``HaloMass`` (float) - The total mass of dark matter in the halo as
-     identified by the halo finder.
-  #. ``NumPart`` (int) - Number of dark matter particles in the halo as identified
-     by the halo finder.
-  #. ``CenMassX``, 
-  #. ``CenMassY``,
-  #. ``CenMassZ`` (float) - The location of the center of mass of the halo in code units.
-  #. ``BulkVelX``,
-  #. ``BulkVelY``,
-  #. ``BulkVelZ`` (float) - The velocity of the center of mass of the halo in
-     cgs units.
-  #. ``MaxRad`` (float) - The distance from the center of mass to the most
-     remote particle in the halo in code units.
-  #. ``ChildHaloID0`` (int) - The GlobalHaloID of the child halo which receives
-     the greatest proportion of particles from this halo.
-  #. ``ChildHaloFrac0`` (float) - The fraction by mass of particles from this
-     (parent) halo that goes to the child halo recorded in ChildHaloID0.  If
-     all the particles from this parent halo goes to ChildHaloID0, this number will
-     be 1.0, regardless of the mass of the child halo.
-  #. ``ChildHaloID[1-4]``, ``ChildHaloFrac[1-4]`` (int, float) - Similar to the
-     columns above, these store the second through fifth greatest recipients of
-     particle mass from this parent halo.
-
-.. warning::
-
-   A value of -1 in any of the ``ChildHaloID`` columns corresponds to
-   a fake (placeholder) child halo entry. There is no halo with an ID equal to -1.
-   This is used during the merger tree construction,
-   and must be accounted for when constructing SQL queries of the database.
-
-To get the data for the most massive halo at the end of the simulation,
-there is a convenience class that simplifies database access. Using it, a query
-might look like this:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-
-  mtc = MergerTreeConnect(database='halos.db')
-  line = "SELECT * FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
-  results = mtc.query(line)
-
-``results`` is a list containing a singular tuple containing the values for that halo in
-the same order as
-given above for the columns.
-
-Another way to get the same information is to use one of the convenience functions.
-The following example shows how to do this:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-
-  mtc = MergerTreeConnect(database='halos.db')
-  this_halo = mtc.get_GlobalHaloID(0, 0.0)
-
-The first term of ``get_GlobalHaloID`` is the ``SnapHaloID`` for the halo of
-interest, and the second is the redshift of interest.
-The results are stored in ``this_halo`` as an integer.
-
-If all that is wanted is a few of the columns, this slight modification below
-will retrieve only the desired data. In general, it is a good idea to retrieve
-only the columns that will actually be used. Requesting all the columns (with
-``*``) requires more reads from disk and slows down the query.
-
-.. code-block:: python
-
-  line = "SELECT NumPart, GlobalHaloID FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
-  results = mtc.query(line)
-
-``results`` is a list containing a single tuple containing two items, the values for 
-``NumPart`` first and ``GlobalHaloID`` second.
-
-There is also a convenience function that will retrieve all the data columns
-for a given halo.
-The input of the function is the ``GlobalHaloID`` for the
-halo of interest, and it returns a dictionary where the keys are the names
-of the data columns, and the values are the entries in the database.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-
-  mtc = MergerTreeConnect(database='halos.db')
-  info = mtc.get_halo_info(1544)
-  print info
-  {'BulkVelX': -32759799.359999999,
-   'BulkVelY': -28740239.109999999,
-   'BulkVelZ': -20066000.690000001,
-   'CenMassX': 0.23059111360000001,
-   'CenMassY': 0.4061139809,
-   'CenMassZ': 0.80882763749999997,
-   'ChildHaloFrac0': 0.9642857141249418,
-   'ChildHaloFrac1': 0.0,
-   'ChildHaloFrac2': 0.0,
-   'ChildHaloFrac3': 0.0,
-   'ChildHaloFrac4': 0.0,
-   'ChildHaloID0': 1688,
-   'ChildHaloID1': 1712,
-   'ChildHaloID2': 1664,
-   'ChildHaloID3': 1657,
-   'ChildHaloID4': 1634,
-   'GlobalHaloID': 1544,
-   'HaloMass': 20934692770000.0,
-   'MaxRad': 0.01531299899,
-   'NumPart': 196,
-   'SnapCurrentTimeIdentifier': 1275946788,
-   'SnapHaloID': 56,
-   'SnapZ': 0.024169713061444002}
-
-If data from more than one halo is desired, more than one item will be returned.
-This query will find the largest halo from each of the snapshots.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0;"
-  results = mtc.query(line)
-
-``results`` is a list of multiple two-tuples.
-Note that SQLite doesn't return the values in any
-particular order. If order is unimportant, it saves time. But if order is
-important, you can modify the query to sort the results by redshift.
-
-.. code-block:: python
-
-  line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0 ORDER BY SnapZ DESC;"
-
-Now ``results`` will be ordered by time, first to last, for each two-tuple
-in the list.
-
-The function ``get_halo_parents()`` will return all the halos that are
-identified as parents of the specified halo.
-Due to the way that the halo tree is constructed, it will also return parent
-halos that have zero mass contribution to the specified halo.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  parents = mtc.get_halo_parents(1688)
-  print parents
-  [[1544, 0.9642857141249418],
-   [1613, 0.0],
-   [1614, 0.0],
-   [1489, 0.0],
-   [1512, 0.0],
-   [1519, 0.0],
-   [1609, 0.0]]
-
-The last example shows the kernel of the most important operation for a
-merger tree: recursion back in time to find progenitors for a halo. Using a 
-query similar to ones above, the ``GlobalHaloID`` is found for the halo of
-interest at some late point in time (z=0, typically). Using that value (given
-the random-ish value of 1234567),
-the halos that came before can be identified very easily:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-
-  lineage = {}
-  # Recursive function on parent halos.
-  def findParent(haloID, lineage):
-      line = "SELECT GlobalHaloID from Halos where ChildHaloID0=%d;" % haloID
-      results = mtc.query(line)
-      if results == []:
-          return lineage
-      # A one-tuple inside a list.
-      parentID = results[0][0]
-      lineage[parentID] = haloID
-      # Now we recurse back in time.
-      lineage = findParent(parentID, lineage)
-
-  # Stores the parent->child relationships.
-  lineage = {}
-  # Call the function once with the late halo.
-  lineage = findParent(1234567, lineage)
-
-Contained within the dict ``lineage`` is the primary lineage for the final
-chosen halo. Storing the family tree in this way may not be the best choice,
-but this makes it clear how easy it is to build up the history of a halo
-over time.
-
-Merger Tree Convenience Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Below are some examples of the convenience functions available.
-
-**get_GlobalHaloID(SnapHaloID, z)**. Returns the GlobalHaloID for the
-given halo.::
-
-        
-    Parameters
-    ---------
-    SnapHaloID : Integer
-        The index label for the halo of interest, equivalent to
-        the first column of the halo finder text output file.
-    z : Float
-        The redshift for the halo of interest. The value returned will be
-        for the halo with SnapHaloID equal to ID (above) with redshift
-        closest to this value.
-    
-    Examples
-    --------
-    >>> this_halo = mtc.get_GlobalHaloID(0, 0.)
-
-**get_halo_parents(GlobalHaloID)**. Returns a list of the parent halos to the
-given halo, along with the contribution fractions from parent to child.
-This function returns a list of lists, where each entry in the top list
-is [GlobalHaloID, ChildHaloFrac] of the parent halo in relationship
-to the given child halo.::
-        
-    Parameters
-    ----------
-    GlobalHaloID : Integer
-        The GlobalHaloID of the halo of interest.
-    
-    Examples
-    --------
-    >>> parents = mtc.get_halo_parents(1688)
-    >>> print parents
-    [[1544, 0.9642857141249418],
-     [1613, 0.0],
-     [1614, 0.0],
-     [1489, 0.0],
-     [1512, 0.0],
-     [1519, 0.0],
-     [1609, 0.0]]
-
-**get_direct_parent(GlobalHaloID)**. Returns the GlobalHaloID of the direct
-parent of the given halo.
-This is accomplished by identifying the most massive parent halo
-that contributes at least 50% of its mass to the given halo.::
-        
-    Parameters
-    ----------
-    GlobalHaloID : Integer
-        The GlobalHaloID of the halo of interest.
-    
-    Examples
-    --------
-    >>> parent = mtc.get_direct_parent(1688)
-    >>> print parent
-    1544
-
-**get_halo_info(GlobalHaloID)**. Returns all available information for
-the given GlobalHaloID in the form of a dict.::
-        
-    Parameters
-    ----------
-    GlobalHaloID : Integer
-        The unique index for the halo of interest.
-    
-    Examples
-    --------
-    >>> info = mtc.get_halo_info(1544)
-    >>> print info
-    {'BulkVelX': -32759799.359999999,
-     'BulkVelY': -28740239.109999999,
-     'BulkVelZ': -20066000.690000001,
-     'CenMassX': 0.23059111360000001,
-     'CenMassY': 0.4061139809,
-     'CenMassZ': 0.80882763749999997,
-     'ChildHaloFrac0': 0.9642857141249418,
-     'ChildHaloFrac1': 0.0,
-     'ChildHaloFrac2': 0.0,
-     'ChildHaloFrac3': 0.0,
-     'ChildHaloFrac4': 0.0,
-     'ChildHaloID0': 1688,
-     'ChildHaloID1': 1712,
-     'ChildHaloID2': 1664,
-     'ChildHaloID3': 1657,
-     'ChildHaloID4': 1634,
-     'GlobalHaloID': 1544,
-     'HaloMass': 20934692770000.0,
-     'MaxRad': 0.01531299899,
-     'NumPart': 196,
-     'SnapCurrentTimeIdentifier': 1275946788,
-     'SnapHaloID': 56,
-     'SnapZ': 0.024169713061444002}
-
-
-Merger Tree Output
-------------------
-
-There are two included methods for outputting the contents of a Merger Tree
-database: Graphviz and plain-text columnar format.
-
-Graphviz Output
-~~~~~~~~~~~~~~~
-
-The `Graphviz <http://graphviz.org/>`_ output function can write the merger
-tree to a text file, which can then be parsed by the GraphViz executable
-``dot`` into an image, or an image can be created directly.
-The GraphViz engine used to parse the
-output is the ``dot`` engine, which produces hierarchical diagrams where
-directionality (such as left to right or top to bottom)
-indicates some meaningful property.
-In the case of the merger tree, top to bottom indicates the progress of
-time.
-Graphviz can output the visualization into a wide range of image and vector
-formats suitable for any application.
-
-Below is a simple example of the Graphviz/dot visualization.
-Each box contains the mass of the halo (in Msun), and the center of mass
-for the halo in simulation units.
-For each snapshot, the box for the largest halo is colored red.
-The numbers next to the link arrows gives the percentage of the parent
-halo's mass that goes to the child.
-On each row, the un-linked black boxes
-contain the redshift for that snapshot.
-
-.. image:: _images/merger_tree_ex.png
-   :width: 400
-   :height: 438
-
-To output the merger tree for a set of halos, the chosen halos need to be
-identified. There are two choices, either the ``GlobalHaloID`` or
-the ``SnapHaloID`` along with the ``SnapCurrentTimeIdentifier`` value
-for the chosen halo(s) may be used.
-Two bits of information need to be used if ``GlobalHaloID`` is not specified
-because ``SnapHaloID`` is not an unique identifier in the database.
-The reason why ``SnapCurrentTimeIdentifier`` is used rather than ``SnapZ`` has
-to do with the float valuation of the redshift column and the way SQL queries
-work.
-If ``SnapZ`` were used, the precise float value of the desired redshift would
-have to be used, rather than the simpler-to-get-correct integer value of
-``SnapCurrentTimeIdentifier``.
-
-Luckily it isn't as hard as it sounds to get the ``GlobalHaloID`` for the
-desired halo(s).
-By using the ``MergerTreeConnect`` class, it is simple to pick out halos
-before creating the Graphviz output.
-Below, the ``GlobalHaloID`` for the most massive halo in the last (z~0, typically)
-snapshot is found:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  
-  line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
-  results = mtc.query(line)
-  print results
-
-Because of the way the database is created, from early times to late, the most
-massive halo at z~0 will have the largest ``GlobalHaloID`` for all halos with
-``SnapHaloID``=0. ``results`` will contain a one-tuple in a list of the
-desired ``GlobalHaloID``.
-
-Alternatively, one of the convenience functions can be used which may be easier:
-
-.. code-block:: python
-
-  from yt.mods import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  
-  thisHalo = mtc.get_GlobalHaloID(0, 0.0)
-
-``thisHalo`` will be an integer giving the GlobalHaloID for the most massive
-halo (ID=0) at z=0.0.
-
-To output the merger tree for the five largest halos in the last snapshot,
-it may be simplest to find the ``SnapCurrentTimeIdentifier`` for that
-snapshot.
-This can either be done by referencing the dataset itself by hand
-(look for ``CurrentTimeIdentifier`` in the Enzo restart file), or by querying
-the database.
-Here is how to query the database for the right information:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  
-  line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
-  results = mtc.query(line)
-  
-  line = "SELECT SnapCurrentTimeIdentifier FROM Halos WHERE GlobalHaloID=%d;" % results[0][0]
-  results = mtc.query(line)
-  print results
-
-``results`` contains a one-tuple in a list of the desired
-``SnapCurrentTimeIdentifier``.
-Supposing that the desired ``SnapCurrentTimeIdentifier`` is 72084721, outputting
-merger trees is now simple:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeDotOutput(halos=[0,1,2,3,4], database='halos.db',
-      dotfile='MergerTree.gv', current_time=72084721)
-
-This will output the file ``MergerTree.gv`` which can be parsed by Graphviz.
-To output to an image format, name the file appropriately (``MergerTree.png``).
-A list of available GraphViz image formats can be found by invoking
-(from the command line) ``dot -v``.
-
-If the ``GlobalHaloID`` values are known for all of the desired halos,
-``current_time`` should not be specified, as below:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
-      dotfile='MergerTree.gv', link_min=0.7)
-
-The ``link_min`` parameter above limits the tree to following links between
-parent and child halos for which at least 70% of the parent halo's mass goes
-to the child. The default is 0.2.
-
-In this slightly modified example below, if ``dot`` is installed in the
-``PATH``, an image file will be created without an intermediate text file:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
-      dotfile='MergerTree.png', link_min=0.7)
-
-
-Plain-Text Output
-~~~~~~~~~~~~~~~~~
-
-This is how to output the entire contents of the database to a text file:
-
-.. code-block:: python
-
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeTextOutput(database='halos.db', outfile='MergerTreeDB.txt')
-
-Putting it All Together
------------------------
-
-Here is an example of how to create a merger tree for the most massive halo
-in the final snapshot from start to finish, and output the Graphviz
-visualization as a PDF file.
-This will work in serial and in parallel.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  from yt.analysis_modules.halo_finding.api import *
-
-  # Pick our snapshots to use.
-  files = []
-  start = 100
-  finish = 116
-  for i in range(start, finish + 1):
-      files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
-  my_database = '/path/to/database/halos.db'
-
-  # Build the tree.
-  MergerTree(restart_files=files, database=my_database)
-  
-  # Get the GlobalHaloID for the halo.
-  mtc = MergerTreeConnect(database=my_database)
-  my_halo = mtc.get_GlobalHaloID(0, 0.0)
-  
-  # Output the tree as a PDF file.
-  MergerTreeDotOutput(halos=my_halo, database=my_database, link_min=0.5,
-      dotfile='MergerTree.pdf')
-
-
-  


https://bitbucket.org/yt_analysis/yt/commits/d0a81e92ad2b/
Changeset:   d0a81e92ad2b
Branch:      yt-3.0
User:        brittonsmith
Date:        2014-07-24 01:44:26
Summary:     Removing merger tree from api docs.
Affected #:  1 file

diff -r 950c0a524631f152732f8df6bda4dbe1fa572e98 -r d0a81e92ad2ba5c85c7bafa367314b9c92578a5a doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -439,13 +439,6 @@
 
 You can use Halo catalogs generated externally as well:
 
-.. autosummary::
-   :toctree: generated/
-
-   ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.HaloCatalog
-   ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.EnzoFOFMergerTree
-   ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.plot_halo_evolution
-
 Two Point Functions
 ^^^^^^^^^^^^^^^^^^^
 


https://bitbucket.org/yt_analysis/yt/commits/aa26e8fb20d5/
Changeset:   aa26e8fb20d5
Branch:      yt-3.0
User:        brittonsmith
Date:        2014-07-24 01:52:59
Summary:     Removing old halo interfaces from API docs.
Affected #:  1 file

diff -r d0a81e92ad2ba5c85c7bafa367314b9c92578a5a -r aa26e8fb20d5c8ed1aee8d9e43feca6845a9ebbb doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -408,35 +408,6 @@
    ~yt.analysis_modules.halo_finding.halo_objects.parallelHF
    ~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
 
-You can also operate on the Halo and HAloList objects themselves:
-
-.. autosummary::
-   :toctree: generated/
-
-   ~yt.analysis_modules.halo_finding.halo_objects.Halo
-   ~yt.analysis_modules.halo_finding.halo_objects.HaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.HOPHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.RockstarHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.FOFHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadedHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.TextHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.RockstarHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.HOPHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.FOFHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadedHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.TextHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHaloList
-
-There are also functions for loading halos from disk:
-
-.. autosummary::
-   :toctree: generated/
-
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadHaloes
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadTextHaloes
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadRockstarHalos
-
 You can use Halo catalogs generated externally as well:
 
 Two Point Functions


https://bitbucket.org/yt_analysis/yt/commits/3be418bfa459/
Changeset:   3be418bfa459
Branch:      yt-3.0
User:        brittonsmith
Date:        2014-07-24 02:00:27
Summary:     Removing one more line.
Affected #:  1 file

diff -r aa26e8fb20d5c8ed1aee8d9e43feca6845a9ebbb -r 3be418bfa459d9ded32d6ec5d3ade560c062db5c doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -408,8 +408,6 @@
    ~yt.analysis_modules.halo_finding.halo_objects.parallelHF
    ~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
 
-You can use Halo catalogs generated externally as well:
-
 Two Point Functions
 ^^^^^^^^^^^^^^^^^^^
 


https://bitbucket.org/yt_analysis/yt/commits/0a9b5f49e715/
Changeset:   0a9b5f49e715
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-07-24 13:50:20
Summary:     Merged in brittonsmith/yt/yt-3.0 (pull request #1066)

Removing most of the merger tree machinery.
Affected #:  8 files

diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/analyzing/analysis_modules/halo_analysis.rst
--- a/doc/source/analyzing/analysis_modules/halo_analysis.rst
+++ b/doc/source/analyzing/analysis_modules/halo_analysis.rst
@@ -11,4 +11,5 @@
    halo_transition
    halo_finding
    halo_mass_function
+   halo_merger_tree
    halo_analysis_example

diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/analyzing/analysis_modules/halo_merger_tree.rst
--- /dev/null
+++ b/doc/source/analyzing/analysis_modules/halo_merger_tree.rst
@@ -0,0 +1,6 @@
+.. _merger_tree:
+
+Halo Merger Tree
+================
+
+.. note:: As of :code:`yt-3.0`, the halo merger tree functionality has been removed to be replaced by machinery that works with the ``HaloCatalog`` object.  In the mean time, this functionality can still be found in :code:`yt-2.x`.

diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/analyzing/analysis_modules/merger_tree.rst
--- a/doc/source/analyzing/analysis_modules/merger_tree.rst
+++ /dev/null
@@ -1,767 +0,0 @@
-.. _merger_tree:
-
-Halo Merger Tree
-================
-
-.. note:: At the moment the merger tree is not yet implemented using new 
-    halo catalog functionality. 
-
-The Halo Merger Tree extension is capable of building a database of halo mergers
-over a set of time-ordered Enzo datasets. The fractional contribution of older
-'parent' halos to younger 'child' halos is calculated by comparing the unique
-index labels of their constituent particles. The data is stored in a
-`SQLite <http://sqlite.org/>`_ database which enables the use of powerful
-and fast SQL queries over all the halos.
-
-General Overview
-----------------
-
-The first requirement is a set of sequential datasets.
-The detail of the merger tree is increased as the difference in
-time between snapshots is reduced, at the cost of higher computational effort
-for the tree itself and and disk usage for the snapshots.
-The merger tree relies on the output of one of the Halo Finders in yt, and the
-user can choose which one to use.
-The merger tree is capable of running the halo finder if it hasn't already
-been done.
-Once halo finding is accomplished for all the data snapshots, the halo
-lineage is calculated by comparing the particle membership of halos between
-pairs of time steps.
-The halo data and tree data is stored in the SQLite database.
-
-Clearly, another requirement is that Python has the
-`sqlite3 library <http://docs.python.org/library/sqlite3.html>`_
-installed.
-This should be built along with everything else yt needs
-if the ``install_script.sh`` was used.
-
-The merger tree can be calculated in parallel, and if necessary, it will run
-the halo finding in parallel as well. Please see the note below about the
-special considerations needed for Network File Systems.
-
-There is a convenience-wrapper for querying the database, called
-``MergerTreeConnect``.
-It simplifies accessing data in the database.
-
-There are two output classes for the merger tree. The ``MergerTreeDotOutput`` class
-outputs the tree for a user-specified subset of halos to a
-`Graphviz format <http://graphviz.org/>`_ file.
-Graphviz is an open-source package for visualizing connected objects in a
-graphical way.
-There are binary distributions for all major operating systems.
-It is also possible to dump the contents of the SQLite database to a simple text file
-with the ``MergerTreeTextOutput`` class.
-The data is saved in columnar format.
-
-Conceptual Primer
-~~~~~~~~~~~~~~~~~
-
-The best way to view the merger tree extension is as a two-part process.
-First, the merger tree is built and stored in the database.
-This process can be quite time consuming, depending on the size of the simulation,
-and the number and size of halos found in the snapshots.
-This is not a process one wants to do very often, and why it is separate
-from the analysis parts.
-
-The second part is actually a many-part process, which is the analysis of the
-merger tree itself.
-The first step is computationally intensive, but the analysis step
-is user-intensive.
-The user needs to decide what to pull out of the merger tree
-and figure out how to extract the needed data with SQL statements.
-Once an analysis pipeline is written, it should run very fast for even
-very large databases.
-
-A Note About Network File Systems
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Accessing a SQLite database stored on a Network (or Distributed) File System (NFS)
-is a risky thing to do, particularly if more than one task wants to write
-at the same time (`see more here <http://www.sqlite.org/lockingv3.html#how_to_corrupt>`_).
-NFS disks can store files on multiple physical hard drives, and it can take time
-for changes made by one task to appear to all the parallel tasks.
-Only one task of the merger tree ever interacts with the database,
-so these dangers are minimal,
-but in general it's a good idea to know something about the disk used to
-store the database.
-
-In general, it is recommended to keep the database on a 'real disk' 
-(/tmp for example, if all the tasks are on the same SMP node,
-or RAM disk for extra speed) if possible,
-but it should work on a NFS disk as well.
-If a temporary disk is used to store the database while it's being built,
-remember to copy the file to a permanent disk after the merger tree script
-is finished.
-
-
-Running and Using the Halo Merger Tree
---------------------------------------
-
-It is very simple to create a merger tree database for a series of snapshots.
-The most difficult part is creating an ordered list of Enzo restart files.
-There are two ways to do it, by hand or with the EnzoSimulation extension.
-
-By Hand
-~~~~~~~
-
-Here is an example of how to build the list and build the database by hand.
-Here, the snapshots are stored in directories named DD????, and the enzo
-restart file named data????, where ???? is a four digit zero-padded integer.
-The final snapshot considered (most progressed in time) is DD0116,
-and the earliest that will be examined is DD0100.
-The database will be saved to ``/path/to/database/halos.db``.
-This example below works identically in serial or in parallel.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  from yt.analysis_modules.halo_finding.api import *
-
-  files = []
-  start = 100
-  finish = 116
-  for i in range(start, finish + 1):
-      files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
-  MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-If the halos have not been found previously for the snapshots, the halo finder
-will be run automatically. See the note about this below.
-
-Using EnzoSimulation
-~~~~~~~~~~~~~~~~~~~~
-
-Here is how to build the input list of restart files using the EnzoSimulation
-extension.
-It is possible to set range and interval between snapshots.
-Please see the EnzoSimulation
-documentation (:ref:`analyzing-an-entire-simulation`) for details.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  from yt.analysis_modules.halo_finding.api import *
-  import yt.analysis_modules.simulation_handler.api as ES
-  
-  es = ES.EnzoSimulation('/path/to/snapshots/simulation.par')
-  
-  files = []
-  for output in es.allOutputs:
-      files.append(output['filename'])
-
-  MergerTree(restart_files=files, database='/path/to/database/halos.db')
-
-Merger Tree Parallelism
------------------------
-
-If the halos are to be found during the course of building the merger tree,
-run with an appropriate number of tasks to the size of the dataset and the
-halo finder used.
-The speed of the merger tree itself,
-which compares halo membership in parallel very effectively,
-is almost completely constrained by the read/write times of the SQLite file.
-In tests with the halos pre-located, there is not much speedup beyond two MPI tasks.
-There is no negative effect with running the merger tree with more tasks (which is
-why if halos are to be found by the merger tree, the merger tree should be
-run with as many tasks as that step requires), and indeed if the simulation
-is a large one, running in parallel does provide memory parallelism,
-which is important.
-
-How The Database Is Handled In Analysis Restarts
-------------------------------------------------
-
-The Merger Tree is designed to allow the merger tree database to be built
-incrementally.
-For example, if a simulation is currently being run, the merger
-tree database can be built for the available datasets, and when new ones are
-created, the database extended to include them.
-So if there are going to be
-60 data snapshots total (indexed (0, 1, 2, ..., 59)), and only 50 are saved when the
-tree is first built, the analysis should be done on datasets [0, 49].
-If the last ten become available, re-run the merger tree on datasets [49, 59]
-referencing the same database as before.
-By referencing the same database as before, work does not need to be repeated.
-
-If the merger tree process is interrupted before completion (say, if the 
-jobs walltime is exceeded and the scheduler kills it), just run the exact
-same job again.
-The merger tree will check to see what work has already been completed, and
-resume where it left off.
-
-Additional Parameters
-~~~~~~~~~~~~~~~~~~~~~
-
-When calling ``MergerTree``, there are three parameters that control how the
-halo finder is run, if it needs to be run.
-
-  * ``halo_finder_function`` (name) - Which of the halo finders (:ref:`halo_finding`)
-    to use. Default: ``HaloFinder`` (HOP).
-  * ``halo_finder_threshold`` (float) - When using HOP or Parallel HOP, this sets the
-    threshold used. Default: 80.0.
-  * ``FOF_link_length`` (float) - When using Friends of Friends (FOFHaloFinder), this sets
-    the inter-particle link length used. Default: 0.2.
-  * ``dm_only`` (bool) - Whether to include stars (False), or only the dark
-    matter particles when building halos (True).
-    Default: False.
-  * ``refresh`` (bool) - If set to True, this will run the halo finder and
-    rebuild the database regardless of whether or not the halo files or
-    database exist on disk already.
-    Default: False.
-  * ``index`` (bool) - Whether to add an index to the SQLite file. True makes
-    SQL searches faster at the cost of additional disk space. Default=True.
-
-Example using Parallel HOP:
-
-.. code-block:: python
-
-  MergerTree(restart_files=files, database='/path/to/database/halos.db',
-      halo_finder_function=parallelHF, halo_finder_threshold=100.)
-
-Pre-Computing Halos
-~~~~~~~~~~~~~~~~~~~
-
-If halo finding is to happen before the merger tree is calculated, and the
-work is not to be wasted, special care
-should be taken to ensure that all the data required for the merger tree is
-saved.
-By default, the merger tree looks for files that begin with the name ``MergerHalos``
-in the same directory as each Enzo restart file,
-and if those files are missing or renamed, halo finding will be performed again.
-If ``halos`` is the list of halos returned by the halo finder, these three
-commands should be called to save the needed data:
-
-.. code-block:: python
-
-  halos.write_out('MergerHalos.out')
-  halos.write_particle_lists('MergerHalos')
-  halos.write_particle_lists_txt('MergerHalos')
-
-There is a convenience function that will call the three functions above
-at one time:
-
-.. code-block:: python
-
-  halos.dump('MergerHalos')
-
-Please see the documents on halo finding for more information on what these
-commands do (:ref:`halo_finding`).
-
-Accessing Data in the Database
-------------------------------
-
-SQLite databases support nearly all of the standard SQL queries.
-It is possible to write very complicated and powerful SQL queries, but below
-only simple examples will are shown. Please see other resources (WWW, books) for
-more on how to write SQL queries.
-
-It is possible to read and modify a SQLite database from the command line using
-the ``sqlite3`` command (e.g. ``sqlite3 database.db``). It can be very convenient
-to use this to quickly inspect a database, but is not suitable for extracting or inserting
-large amounts of data. There are many examples (again, see the WWW or books)
-available on how to use the command line ``sqlite3`` command.
-
-The table containing halo data in the database is named 'Halos'.
-All queries for halo data will come from this table.
-The table has these columns:
-
-  #. ``GlobalHaloID`` (int) - A fully-unique identifier for the halo.
-  #. ``SnapCurrentTimeIdentifier`` (int) - An unique time identifier for the snapshot
-     the halo comes from. Equivalent to 'CurrentTimeIdentifier' from the Enzo
-     restart file.
-  #. ``SnapZ`` (float) - The redshift for the halo.
-  #. ``SnapHaloID`` (int) - The halo ID for the halo taken from the output of the
-     halo finder (i.e. 'halos.write_out("HopAnalysis.out")'). It is unique for halos
-     in the same snapshot, but not unique across the full database.
-  #. ``HaloMass`` (float) - The total mass of dark matter in the halo as
-     identified by the halo finder.
-  #. ``NumPart`` (int) - Number of dark matter particles in the halo as identified
-     by the halo finder.
-  #. ``CenMassX``, 
-  #. ``CenMassY``,
-  #. ``CenMassZ`` (float) - The location of the center of mass of the halo in code units.
-  #. ``BulkVelX``,
-  #. ``BulkVelY``,
-  #. ``BulkVelZ`` (float) - The velocity of the center of mass of the halo in
-     cgs units.
-  #. ``MaxRad`` (float) - The distance from the center of mass to the most
-     remote particle in the halo in code units.
-  #. ``ChildHaloID0`` (int) - The GlobalHaloID of the child halo which receives
-     the greatest proportion of particles from this halo.
-  #. ``ChildHaloFrac0`` (float) - The fraction by mass of particles from this
-     (parent) halo that goes to the child halo recorded in ChildHaloID0.  If
-     all the particles from this parent halo goes to ChildHaloID0, this number will
-     be 1.0, regardless of the mass of the child halo.
-  #. ``ChildHaloID[1-4]``, ``ChildHaloFrac[1-4]`` (int, float) - Similar to the
-     columns above, these store the second through fifth greatest recipients of
-     particle mass from this parent halo.
-
-.. warning::
-
-   A value of -1 in any of the ``ChildHaloID`` columns corresponds to
-   a fake (placeholder) child halo entry. There is no halo with an ID equal to -1.
-   This is used during the merger tree construction,
-   and must be accounted for when constructing SQL queries of the database.
-
-To get the data for the most massive halo at the end of the simulation,
-there is a convenience class that simplifies database access. Using it, a query
-might look like this:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-
-  mtc = MergerTreeConnect(database='halos.db')
-  line = "SELECT * FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
-  results = mtc.query(line)
-
-``results`` is a list containing a singular tuple containing the values for that halo in
-the same order as
-given above for the columns.
-
-Another way to get the same information is to use one of the convenience functions.
-The following example shows how to do this:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-
-  mtc = MergerTreeConnect(database='halos.db')
-  this_halo = mtc.get_GlobalHaloID(0, 0.0)
-
-The first term of ``get_GlobalHaloID`` is the ``SnapHaloID`` for the halo of
-interest, and the second is the redshift of interest.
-The results are stored in ``this_halo`` as an integer.
-
-If all that is wanted is a few of the columns, this slight modification below
-will retrieve only the desired data. In general, it is a good idea to retrieve
-only the columns that will actually be used. Requesting all the columns (with
-``*``) requires more reads from disk and slows down the query.
-
-.. code-block:: python
-
-  line = "SELECT NumPart, GlobalHaloID FROM Halos WHERE SnapZ=0.0 AND SnapHaloID=0;"
-  results = mtc.query(line)
-
-``results`` is a list containing a single tuple containing two items, the values for 
-``NumPart`` first and ``GlobalHaloID`` second.
-
-There is also a convenience function that will retrieve all the data columns
-for a given halo.
-The input of the function is the ``GlobalHaloID`` for the
-halo of interest, and it returns a dictionary where the keys are the names
-of the data columns, and the values are the entries in the database.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-
-  mtc = MergerTreeConnect(database='halos.db')
-  info = mtc.get_halo_info(1544)
-  print info
-  {'BulkVelX': -32759799.359999999,
-   'BulkVelY': -28740239.109999999,
-   'BulkVelZ': -20066000.690000001,
-   'CenMassX': 0.23059111360000001,
-   'CenMassY': 0.4061139809,
-   'CenMassZ': 0.80882763749999997,
-   'ChildHaloFrac0': 0.9642857141249418,
-   'ChildHaloFrac1': 0.0,
-   'ChildHaloFrac2': 0.0,
-   'ChildHaloFrac3': 0.0,
-   'ChildHaloFrac4': 0.0,
-   'ChildHaloID0': 1688,
-   'ChildHaloID1': 1712,
-   'ChildHaloID2': 1664,
-   'ChildHaloID3': 1657,
-   'ChildHaloID4': 1634,
-   'GlobalHaloID': 1544,
-   'HaloMass': 20934692770000.0,
-   'MaxRad': 0.01531299899,
-   'NumPart': 196,
-   'SnapCurrentTimeIdentifier': 1275946788,
-   'SnapHaloID': 56,
-   'SnapZ': 0.024169713061444002}
-
-If data from more than one halo is desired, more than one item will be returned.
-This query will find the largest halo from each of the snapshots.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0;"
-  results = mtc.query(line)
-
-``results`` is a list of multiple two-tuples.
-Note that SQLite doesn't return the values in any
-particular order. If order is unimportant, it saves time. But if order is
-important, you can modify the query to sort the results by redshift.
-
-.. code-block:: python
-
-  line = "SELECT HaloMass,SnapZ FROM Halos WHERE SnapHaloID=0 ORDER BY SnapZ DESC;"
-
-Now ``results`` will be ordered by time, first to last, for each two-tuple
-in the list.
-
-The function ``get_halo_parents()`` will return all the halos that are
-identified as parents of the specified halo.
-Due to the way that the halo tree is constructed, it will also return parent
-halos that have zero mass contribution to the specified halo.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  parents = mtc.get_halo_parents(1688)
-  print parents
-  [[1544, 0.9642857141249418],
-   [1613, 0.0],
-   [1614, 0.0],
-   [1489, 0.0],
-   [1512, 0.0],
-   [1519, 0.0],
-   [1609, 0.0]]
-
-The last example shows the kernel of the most important operation for a
-merger tree: recursion back in time to find progenitors for a halo. Using a 
-query similar to ones above, the ``GlobalHaloID`` is found for the halo of
-interest at some late point in time (z=0, typically). Using that value (given
-the random-ish value of 1234567),
-the halos that came before can be identified very easily:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-
-  lineage = {}
-  # Recursive function on parent halos.
-  def findParent(haloID, lineage):
-      line = "SELECT GlobalHaloID from Halos where ChildHaloID0=%d;" % haloID
-      results = mtc.query(line)
-      if results == []:
-          return lineage
-      # A one-tuple inside a list.
-      parentID = results[0][0]
-      lineage[parentID] = haloID
-      # Now we recurse back in time.
-      lineage = findParent(parentID, lineage)
-
-  # Stores the parent->child relationships.
-  lineage = {}
-  # Call the function once with the late halo.
-  lineage = findParent(1234567, lineage)
-
-Contained within the dict ``lineage`` is the primary lineage for the final
-chosen halo. Storing the family tree in this way may not be the best choice,
-but this makes it clear how easy it is to build up the history of a halo
-over time.
-
-Merger Tree Convenience Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Below are some examples of the convenience functions available.
-
-**get_GlobalHaloID(SnapHaloID, z)**. Returns the GlobalHaloID for the
-given halo.::
-
-        
-    Parameters
-    ---------
-    SnapHaloID : Integer
-        The index label for the halo of interest, equivalent to
-        the first column of the halo finder text output file.
-    z : Float
-        The redshift for the halo of interest. The value returned will be
-        for the halo with SnapHaloID equal to ID (above) with redshift
-        closest to this value.
-    
-    Examples
-    --------
-    >>> this_halo = mtc.get_GlobalHaloID(0, 0.)
-
-**get_halo_parents(GlobalHaloID)**. Returns a list of the parent halos to the
-given halo, along with the contribution fractions from parent to child.
-This function returns a list of lists, where each entry in the top list
-is [GlobalHaloID, ChildHaloFrac] of the parent halo in relationship
-to the given child halo.::
-        
-    Parameters
-    ----------
-    GlobalHaloID : Integer
-        The GlobalHaloID of the halo of interest.
-    
-    Examples
-    --------
-    >>> parents = mtc.get_halo_parents(1688)
-    >>> print parents
-    [[1544, 0.9642857141249418],
-     [1613, 0.0],
-     [1614, 0.0],
-     [1489, 0.0],
-     [1512, 0.0],
-     [1519, 0.0],
-     [1609, 0.0]]
-
-**get_direct_parent(GlobalHaloID)**. Returns the GlobalHaloID of the direct
-parent of the given halo.
-This is accomplished by identifying the most massive parent halo
-that contributes at least 50% of its mass to the given halo.::
-        
-    Parameters
-    ----------
-    GlobalHaloID : Integer
-        The GlobalHaloID of the halo of interest.
-    
-    Examples
-    --------
-    >>> parent = mtc.get_direct_parent(1688)
-    >>> print parent
-    1544
-
-**get_halo_info(GlobalHaloID)**. Returns all available information for
-the given GlobalHaloID in the form of a dict.::
-        
-    Parameters
-    ----------
-    GlobalHaloID : Integer
-        The unique index for the halo of interest.
-    
-    Examples
-    --------
-    >>> info = mtc.get_halo_info(1544)
-    >>> print info
-    {'BulkVelX': -32759799.359999999,
-     'BulkVelY': -28740239.109999999,
-     'BulkVelZ': -20066000.690000001,
-     'CenMassX': 0.23059111360000001,
-     'CenMassY': 0.4061139809,
-     'CenMassZ': 0.80882763749999997,
-     'ChildHaloFrac0': 0.9642857141249418,
-     'ChildHaloFrac1': 0.0,
-     'ChildHaloFrac2': 0.0,
-     'ChildHaloFrac3': 0.0,
-     'ChildHaloFrac4': 0.0,
-     'ChildHaloID0': 1688,
-     'ChildHaloID1': 1712,
-     'ChildHaloID2': 1664,
-     'ChildHaloID3': 1657,
-     'ChildHaloID4': 1634,
-     'GlobalHaloID': 1544,
-     'HaloMass': 20934692770000.0,
-     'MaxRad': 0.01531299899,
-     'NumPart': 196,
-     'SnapCurrentTimeIdentifier': 1275946788,
-     'SnapHaloID': 56,
-     'SnapZ': 0.024169713061444002}
-
-
-Merger Tree Output
-------------------
-
-There are two included methods for outputting the contents of a Merger Tree
-database: Graphviz and plain-text columnar format.
-
-Graphviz Output
-~~~~~~~~~~~~~~~
-
-The `Graphviz <http://graphviz.org/>`_ output function can write the merger
-tree to a text file, which can then be parsed by the GraphViz executable
-``dot`` into an image, or an image can be created directly.
-The GraphViz engine used to parse the
-output is the ``dot`` engine, which produces hierarchical diagrams where
-directionality (such as left to right or top to bottom)
-indicates some meaningful property.
-In the case of the merger tree, top to bottom indicates the progress of
-time.
-Graphviz can output the visualization into a wide range of image and vector
-formats suitable for any application.
-
-Below is a simple example of the Graphviz/dot visualization.
-Each box contains the mass of the halo (in Msun), and the center of mass
-for the halo in simulation units.
-For each snapshot, the box for the largest halo is colored red.
-The numbers next to the link arrows gives the percentage of the parent
-halo's mass that goes to the child.
-On each row, the un-linked black boxes
-contain the redshift for that snapshot.
-
-.. image:: _images/merger_tree_ex.png
-   :width: 400
-   :height: 438
-
-To output the merger tree for a set of halos, the chosen halos need to be
-identified. There are two choices, either the ``GlobalHaloID`` or
-the ``SnapHaloID`` along with the ``SnapCurrentTimeIdentifier`` value
-for the chosen halo(s) may be used.
-Two bits of information need to be used if ``GlobalHaloID`` is not specified
-because ``SnapHaloID`` is not an unique identifier in the database.
-The reason why ``SnapCurrentTimeIdentifier`` is used rather than ``SnapZ`` has
-to do with the float valuation of the redshift column and the way SQL queries
-work.
-If ``SnapZ`` were used, the precise float value of the desired redshift would
-have to be used, rather than the simpler-to-get-correct integer value of
-``SnapCurrentTimeIdentifier``.
-
-Luckily it isn't as hard as it sounds to get the ``GlobalHaloID`` for the
-desired halo(s).
-By using the ``MergerTreeConnect`` class, it is simple to pick out halos
-before creating the Graphviz output.
-Below, the ``GlobalHaloID`` for the most massive halo in the last (z~0, typically)
-snapshot is found:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  
-  line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
-  results = mtc.query(line)
-  print results
-
-Because of the way the database is created, from early times to late, the most
-massive halo at z~0 will have the largest ``GlobalHaloID`` for all halos with
-``SnapHaloID``=0. ``results`` will contain a one-tuple in a list of the
-desired ``GlobalHaloID``.
-
-Alternatively, one of the convenience functions can be used which may be easier:
-
-.. code-block:: python
-
-  from yt.mods import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  
-  thisHalo = mtc.get_GlobalHaloID(0, 0.0)
-
-``thisHalo`` will be an integer giving the GlobalHaloID for the most massive
-halo (ID=0) at z=0.0.
-
-To output the merger tree for the five largest halos in the last snapshot,
-it may be simplest to find the ``SnapCurrentTimeIdentifier`` for that
-snapshot.
-This can either be done by referencing the dataset itself by hand
-(look for ``CurrentTimeIdentifier`` in the Enzo restart file), or by querying
-the database.
-Here is how to query the database for the right information:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  mtc = MergerTreeConnect(database='halos.db')
-  
-  line = "SELECT max(GlobalHaloID) FROM Halos WHERE SnapHaloID=0;"
-  results = mtc.query(line)
-  
-  line = "SELECT SnapCurrentTimeIdentifier FROM Halos WHERE GlobalHaloID=%d;" % results[0][0]
-  results = mtc.query(line)
-  print results
-
-``results`` contains a one-tuple in a list of the desired
-``SnapCurrentTimeIdentifier``.
-Supposing that the desired ``SnapCurrentTimeIdentifier`` is 72084721, outputting
-merger trees is now simple:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeDotOutput(halos=[0,1,2,3,4], database='halos.db',
-      dotfile='MergerTree.gv', current_time=72084721)
-
-This will output the file ``MergerTree.gv`` which can be parsed by Graphviz.
-To output to an image format, name the file appropriately (``MergerTree.png``).
-A list of available GraphViz image formats can be found by invoking
-(from the command line) ``dot -v``.
-
-If the ``GlobalHaloID`` values are known for all of the desired halos,
-``current_time`` should not be specified, as below:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
-      dotfile='MergerTree.gv', link_min=0.7)
-
-The ``link_min`` parameter above limits the tree to following links between
-parent and child halos for which at least 70% of the parent halo's mass goes
-to the child. The default is 0.2.
-
-In this slightly modified example below, if ``dot`` is installed in the
-``PATH``, an image file will be created without an intermediate text file:
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeDotOutput(halos=[24212,5822,19822,10423,51324], database='halos.db',
-      dotfile='MergerTree.png', link_min=0.7)
-
-
-Plain-Text Output
-~~~~~~~~~~~~~~~~~
-
-This is how to output the entire contents of the database to a text file:
-
-.. code-block:: python
-
-  from yt.analysis_modules.halo_merger_tree.api import *
-  
-  MergerTreeTextOutput(database='halos.db', outfile='MergerTreeDB.txt')
-
-Putting it All Together
------------------------
-
-Here is an example of how to create a merger tree for the most massive halo
-in the final snapshot from start to finish, and output the Graphviz
-visualization as a PDF file.
-This will work in serial and in parallel.
-
-.. code-block:: python
-
-  from yt.mods import *
-  from yt.analysis_modules.halo_merger_tree.api import *
-  from yt.analysis_modules.halo_finding.api import *
-
-  # Pick our snapshots to use.
-  files = []
-  start = 100
-  finish = 116
-  for i in range(start, finish + 1):
-      files.append('/path/to/snapshots/DD%04d/data%04d' % (i, i))
-
-  my_database = '/path/to/database/halos.db'
-
-  # Build the tree.
-  MergerTree(restart_files=files, database=my_database)
-  
-  # Get the GlobalHaloID for the halo.
-  mtc = MergerTreeConnect(database=my_database)
-  my_halo = mtc.get_GlobalHaloID(0, 0.0)
-  
-  # Output the tree as a PDF file.
-  MergerTreeDotOutput(halos=my_halo, database=my_database, link_min=0.5,
-      dotfile='MergerTree.pdf')
-
-
-  

diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -408,44 +408,6 @@
    ~yt.analysis_modules.halo_finding.halo_objects.parallelHF
    ~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
 
-You can also operate on the Halo and HAloList objects themselves:
-
-.. autosummary::
-   :toctree: generated/
-
-   ~yt.analysis_modules.halo_finding.halo_objects.Halo
-   ~yt.analysis_modules.halo_finding.halo_objects.HaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.HOPHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.RockstarHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.FOFHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadedHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.TextHalo
-   ~yt.analysis_modules.halo_finding.halo_objects.RockstarHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.HOPHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.FOFHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadedHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.TextHaloList
-   ~yt.analysis_modules.halo_finding.halo_objects.parallelHOPHaloList
-
-There are also functions for loading halos from disk:
-
-.. autosummary::
-   :toctree: generated/
-
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadHaloes
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadTextHaloes
-   ~yt.analysis_modules.halo_finding.halo_objects.LoadRockstarHalos
-
-You can use Halo catalogs generated externally as well:
-
-.. autosummary::
-   :toctree: generated/
-
-   ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.HaloCatalog
-   ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.EnzoFOFMergerTree
-   ~yt.analysis_modules.halo_merger_tree.enzofof_merger_tree.plot_halo_evolution
-
 Two Point Functions
 ^^^^^^^^^^^^^^^^^^^
 

diff -r 1ba67463a5253d18d1767f1f06e96180078aca62 -r 0a9b5f49e71502235e12f934476e918804dc8ded yt/analysis_modules/halo_merger_tree/api.py
--- a/yt/analysis_modules/halo_merger_tree/api.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-API for halo_merger_tree
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-from .merger_tree import \
-    DatabaseFunctions, \
-    MergerTree, \
-    MergerTreeConnect, \
-    Node, \
-    Link, \
-    MergerTreeDotOutput, \
-    MergerTreeTextOutput
-
-from .enzofof_merger_tree import \
-    HaloCatalog, \
-    find_halo_relationships, \
-    EnzoFOFMergerTree, \
-    plot_halo_evolution

This diff is so big that we needed to truncate the remainder.

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.



More information about the yt-svn mailing list