[yt-svn] commit/yt: ngoldbaum: Merged in brittonsmith/yt/yt-3.0 (pull request #1081)

Tue Jul 29 13:43:00 PDT 2014

1 new commit in yt:

https://bitbucket.org/yt_analysis/yt/commits/9cfc115c3d00/
Changeset:   9cfc115c3d00
Branch:      yt-3.0
User:        ngoldbaum
Date:        2014-07-29 22:42:49
Summary:     Merged in brittonsmith/yt/yt-3.0 (pull request #1081)

Halo cleanup.
Affected #:  11 files

diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 doc/source/reference/api/api.rst

--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -279,6 +279,15 @@
 .. autosummary::
    :toctree: generated/
 
+   ~yt.frontends.halo_catalogs.halo_catalog.data_structures.HaloCatalogHDF5File
+   ~yt.frontends.halo_catalogs.halo_catalog.data_structures.HaloCatalogDataset
+   ~yt.frontends.halo_catalogs.halo_catalog.fields.HaloCatalogFieldInfo
+   ~yt.frontends.halo_catalogs.halo_catalog.io.IOHandlerHaloCatalogHDF5
+   ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindParticleIndex
+   ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindHDF5File
+   ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindDataset
+   ~yt.frontends.halo_catalogs.owls_subfind.fields.OWLSSubfindFieldInfo
+   ~yt.frontends.halo_catalogs.owls_subfind.io.IOHandlerOWLSSubfindHDF5
    ~yt.frontends.halo_catalogs.rockstar.data_structures.RockstarBinaryFile
    ~yt.frontends.halo_catalogs.rockstar.data_structures.RockstarDataset
    ~yt.frontends.halo_catalogs.rockstar.fields.RockstarFieldInfo
@@ -394,20 +403,45 @@
    ~yt.data_objects.profiles.Profile3D
    ~yt.data_objects.profiles.create_profile
 
-Halo Finding and Particle Functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Halo Analysis
+^^^^^^^^^^^^^
 
-Halo finding can be executed using these types.  Here we list the main halo
-finders as well as a few other supplemental objects.
+The ``HaloCatalog`` object is the primary means for performing custom analysis 
+on cosmological halos.  It is also the primary interface for halo finding.
 
-.. rubric:: Halo Finders
+.. autosummary::
+   :toctree: generated/
+
+   ~yt.analysis_modules.halo_analysis.halo_catalog.HaloCatalog
+   ~yt.analysis_modules.halo_analysis.halo_finding_methods.HaloFindingMethod
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.HaloCallback
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.halo_sphere
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.sphere_field_max_recenter
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.sphere_bulk_velocity
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.profile
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.save_profiles
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.load_profiles
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.virial_quantities
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.phase_plot
+   ~yt.analysis_modules.halo_analysis.halo_callbacks.delete_attribute
+   ~yt.analysis_modules.halo_analysis.halo_filters.HaloFilter
+   ~yt.analysis_modules.halo_analysis.halo_filters.quantity_value
+   ~yt.analysis_modules.halo_analysis.halo_filters.not_subhalo
+   ~yt.analysis_modules.halo_analysis.halo_quantities.HaloQuantity
+   ~yt.analysis_modules.halo_analysis.halo_quantities.center_of_mass
+   ~yt.analysis_modules.halo_analysis.halo_quantities.bulk_velocity
+
+Halo Finding
+^^^^^^^^^^^^
+
+These provide direct access to the halo finders.  However, it is strongly recommended 
+to use the ``HaloCatalog``.
 
 .. autosummary::
    :toctree: generated/
 
    ~yt.analysis_modules.halo_finding.halo_objects.FOFHaloFinder
    ~yt.analysis_modules.halo_finding.halo_objects.HOPHaloFinder
-   ~yt.analysis_modules.halo_finding.halo_objects.parallelHF
    ~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
 
 Two Point Functions

diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
--- /dev/null
+++ b/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
@@ -0,0 +1,801 @@
+"""
+A very simple, purely-serial, merger tree script that knows how to parse FOF
+catalogs, either output by Enzo or output by yt's FOF halo finder, and then 
+compare parent/child relationships.
+
+
+
+"""
+
+#-----------------------------------------------------------------------------
+# Copyright (c) 2013, yt Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+# plot_halo_evolution() gives a good full example of how to use the framework
+
+# First pass at a simplified merger tree
+#
+# Basic outline:
+#
+# 1. Halo find inline, obtaining particle catalogs
+# 2. Load dataset at time t
+# 3. Load dataset at time t+1
+# 4. Parse catalogs for t and t+1
+# 5. Place halos for t+1 in kD-tree
+# 6. For every halo in t, execute ball-query with some linking length
+# 7. For every halo in ball-query result, execute numpy's intersect1d on
+#    particle IDs
+# 8. Parentage is described by a fraction of particles that pass from one to
+#    the other; we have both descendent fractions and ancestory fractions. 
+
+
+import numpy as np
+import h5py
+import time
+import pdb
+import cPickle
+import glob
+
+from yt.funcs import *
+from yt.extern.pykdtree import KDTree
+import yt.extern.pydot as pydot
+
+# We don't currently use this, but we may again find a use for it in the
+# future.
+class MaxLengthDict(dict):
+    def __init__(self, *args, **kwargs):
+        dict.__init__(self, *args, **kwargs)
+        self.order = [None] * 50
+
+    def __setitem__(self, key, val):
+        if key not in self.order:
+            to_remove = self.order.pop(0)
+            self.pop(to_remove, None)
+        self.order.append(key)
+        dict.__setitem__(self, key, val)
+
+    def __getitem__(self, key):
+        if key in self.order:
+            self.order.pop(self.order.index(key))
+            self.order.append(key)
+        return dict.__getitem__(self, key)
+
+    def __delitem__(self, key):
+        dict.__delitem__(self, key)
+        self.order.pop(self.order.index(key))
+        self.order.insert(0, None)
+
+class HaloCatalog(object):
+    r"""A catalog of halos, parsed from EnzoFOF outputs.
+
+    This class will read in catalogs output by the Enzo FOF halo finder and
+    make available their positions, radii, etc.  Enzo FOF was provided
+    starting with 2.0, and can be run either inline (with the correct
+    options) or as a postprocessing step using the `-F` command line
+    option.  This class is mostly useful when calculating a merger tree,
+    and when the particle IDs for members of a given halo are output as
+    well.
+
+    Parameters
+    ----------
+    output_id : int
+        This is the integer output id of the halo catalog to parse and
+        load.
+    cache : bool
+        Should we store, in between accesses, the particle IDs?  If set to
+        true, the correct particle files must exist.
+    external_FOF : bool, optional
+        Are we building a tree from outputs generated by an
+        external FOF program, or an FOF internal to yt?
+    FOF_directory : str, optional
+        Directory where FOF files are located
+    """
+    cache = None
+    def __init__(self, output_id, cache = True, external_FOF=True, FOF_directory="FOF"):
+        self.output_id = output_id
+        self.external_FOF = external_FOF
+        self.redshift = 0.0
+        self.FOF_directory = FOF_directory
+        self.particle_file = h5py.File("%s/particles_%05i.h5" % \
+                                      (FOF_directory, output_id), "r")
+        if self.external_FOF:
+            self.parse_halo_catalog_external()
+        else:
+            self.parse_halo_catalog_internal()
+        if cache: self.cache = dict()#MaxLengthDict()
+
+    def __del__(self):
+        self.particle_file.close()
+
+    def parse_halo_catalog_external(self):
+        hp = []
+        for line in open("%s/groups_%05i.dat" % \
+                        (self.FOF_directory, self.output_id)):
+            if line.strip() == "": continue # empty
+            if line.startswith("# Red"):
+                self.redshift = float(line.split("=")[1])
+            if line[0] == "#": continue # comment
+            if line[0] == "d": continue # datavar
+            x,y,z = [float(f) for f in line.split(None, 3)[:-1]]
+            hp.append([x,y,z])
+        if hp != []:
+            self.halo_positions = np.array(hp)
+            self.halo_kdtree = KDTree(self.halo_positions)
+        else:
+            self.halo_positions = None
+            self.halo_kdtree = None
+        return hp
+
+    def parse_halo_catalog_internal(self):
+        """
+        This parser works on the files output directly out of yt's internal
+        halo_finder.  The parse_halo_catalog_external works with an 
+        external version of FOF.
+
+        Examples
+        --------
+        >>> ds = load("DD0000/DD0000")
+        >>> halo_list = FOFHaloFinder(ds)
+        >>> halo_list.write_out("FOF/groups_00000.txt")
+        >>> halos_COM = parse_halo_catalog_internal()
+        """
+        hp = []
+        for line in open("%s/groups_%05i.txt" % \
+                        (self.FOF_directory, self.output_id)):
+            if line.startswith("# RED"):
+                self.redshift = float(line.split("=")[1])
+                continue
+            if line.strip() == "": continue # empty
+            if line[0] == "#": continue # comment
+            x,y,z = [float(f) for f in line.split()[7:10]] # COM x,y,z
+            hp.append([x,y,z])
+        if hp != []:
+            self.halo_positions = np.array(hp)
+            self.halo_kdtree = KDTree(self.halo_positions)
+        else:
+            self.halo_positions = None
+            self.halo_kdtree = None
+        return hp
+
+    def read_particle_ids(self, halo_id):        
+        if self.cache is not None:
+            if halo_id not in self.cache:
+                if self.external_FOF:
+                    self.cache[halo_id] = \
+                    self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
+                else:
+                    self.cache[halo_id] = \
+                    self.particle_file["/Halo%08i/particle_index" % halo_id][:]
+            ids = self.cache[halo_id]
+        else:
+            if self.external_FOF:
+                ids = self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
+            else:
+                ids = self.particle_file["/Halo%08i/particle_index" % halo_id][:]
+        return HaloParticleList(halo_id, self.halo_positions[halo_id,:], ids)
+
+    def calculate_parentage_fractions(self, other_catalog, radius = 0.10):
+        parentage_fractions = {}
+        if self.halo_positions == None or other_catalog.halo_positions == None:
+            return parentage_fractions
+        mylog.debug("Ball-tree query with radius %0.3e", radius)
+        all_nearest = self.halo_kdtree.query_ball_tree(
+            other_catalog.halo_kdtree, radius)
+        pbar = get_pbar("Halo Mergers", self.halo_positions.shape[0])
+        for hid1, nearest in enumerate(all_nearest):
+            pbar.update(hid1)
+            parentage_fractions[hid1] = {}
+            HPL1 = self.read_particle_ids(hid1)
+            for hid2 in sorted(nearest):
+                HPL2 = other_catalog.read_particle_ids(hid2)
+                p1, p2 = HPL1.find_relative_parentage(HPL2)
+                parentage_fractions[hid1][hid2] = (p1, p2, HPL2.number_of_particles)
+            parentage_fractions[hid1]["NumberOfParticles"] = HPL1.number_of_particles
+        pbar.finish()
+        return parentage_fractions
+
+class HaloParticleList(object):
+    def __init__(self, halo_id, position, particle_ids):
+        self.halo_id = halo_id
+        self.position = np.array(position)
+        self.particle_ids = particle_ids
+        self.number_of_particles = particle_ids.size
+
+    def find_nearest(self, other_tree, radius = 0.10):
+        return other_tree.query_ball_point(self.position, radius)
+
+    def find_relative_parentage(self, child):
+        # Return two values: percent this halo gave to the other, and percent
+        # of the other that comes from this halo
+        overlap = np.intersect1d(self.particle_ids, child.particle_ids).size
+        of_child_from_me = float(overlap)/child.particle_ids.size
+        of_mine_from_me = float(overlap)/self.particle_ids.size
+        return of_child_from_me, of_mine_from_me
+
+class EnzoFOFMergerBranch(object):
+    def __init__(self, tree, output_num, halo_id, max_children,
+                 min_relation=0.25):
+        self.output_num = output_num
+        self.halo_id = halo_id
+        self.npart = tree.relationships[output_num][halo_id]["NumberOfParticles"]
+        self.children = []
+        self.progenitor = -1
+        max_relationship = 0.0
+        halo_count = 0
+        sorted_keys = sorted(tree.relationships[output_num][halo_id].keys())
+        for k in sorted_keys:
+            if not str(k).isdigit(): continue
+            v = tree.relationships[output_num][halo_id][k]
+            if v[1] > min_relation and halo_count < max_children:
+                halo_count += 1
+                self.children.append((k,v[1],v[2]))
+                if v[1] > max_relationship:
+                    self.progenitor = k
+                    max_relationship = v[1]
+
+class EnzoFOFMergerTree(object):
+    r"""Calculates the parentage relationships for halos for a series of
+    outputs, using the framework provided in enzofof_merger_tree.
+
+    Parameters
+    ----------
+    zrange : tuple
+        This is the redshift range (min, max) to calculate the
+        merger tree. E.g. (0, 2) for z=2 to z=0
+    cycle_range : tuple, optional
+        This is the cycle number range (min, max) to calculate the
+        merger tree.  If both zrange and cycle_number given,
+        ignore zrange.
+    output : bool, optional
+        If provided, both .cpkl and .txt files containing the parentage
+        relationships will be output.
+    load_saved : bool, optional
+        Flag to load previously saved parental relationships
+    save_filename : str, optional
+        Filename to save parental relationships
+    external_FOF : bool, optional
+        Are we building a tree from outputs generated by an
+        external FOF program, or an FOF internal to yt?
+    FOF_directory : str, optional
+        Directory where FOF files are located, note that the files
+        must be named according to the syntax: groups_DDDDD.txt for
+        internal yt outputs, and groups_DDDDD.dat for external FOF outputs.
+        where DDDDD are digits representing the equivalent cycle number.
+        e.g. groups_00000.txt
+    
+    Examples
+    --------
+    >>> mt = EnzoFOFMergerTree()    # by default it grabs every DD in FOF dir
+    >>> mt.build_tree(0)  # Create tree for halo 0
+    >>> mt.print_tree()
+    >>> mt.write_dot()
+
+    See Also
+    --------
+    plot_halo_evolution()
+    """    
+    def __init__(self, zrange=None, cycle_range=None, output=False,
+                 load_saved=False, save_filename="merger_tree.cpkl",
+                 external_FOF=True, FOF_directory="FOF"):
+
+        self.relationships = {}
+        self.redshifts = {}
+        self.external_FOF = external_FOF
+        self.FOF_directory = FOF_directory
+        if load_saved:
+            self.load_tree("%s/%s" % (self.FOF_directory, save_filename))
+            # make merger tree work within specified cycle/z limits
+            # on preloaded halos
+            if zrange is not None:
+                self.select_redshifts(zrange)
+            if cycle_range is not None:
+                self.select_cycles(cycle_range)
+        else:
+            self.find_outputs(zrange, cycle_range, output)
+            self.run_merger_tree(output)
+            self.save_tree("%s/%s" % (self.FOF_directory, save_filename))
+        
+    def select_cycles(self, cycle_range):
+        """
+        Takes an existing tree and pares it to only include a subset of
+        cycles.  Useful in paring a loaded tree. 
+        """
+        # N.B. Does not delete info from self.relationships to save space
+        # just removes it from redshift dict for indexing
+        for cycle in self.redshifts.keys():
+            if cycle <= cycle_range[0] and cycle >= cycle_range[1]:
+                del self.redshifts[cycle]
+
+    def select_redshifts(self, zrange):
+        """
+        Takes an existing tree and pares it to only include a subset of
+        redshifts.  Useful in paring a loaded tree. 
+        """
+        # N.B. Does not delete info from self.relationships to save space
+        # just removes it from redshift dict for indexing
+        for redshift in self.redshifts.values():
+            if redshift <= zrange[0] and redshift >= zrange[1]:
+                # some reverse lookup magic--assumes unique cycle/z pairs
+                cycle = [key for key,value in mt.redshifts.items() \
+                         if value == redshift][0]
+                del self.redshifts[cycle]
+
+    def save_tree(self, filename):
+        cPickle.dump((self.redshifts, self.relationships),
+                     open(filename, "wb"))
+
+    def load_tree(self, filename):
+        self.redshifts, self.relationships = \
+                        cPickle.load(open(filename, "rb"))
+
+    def clear_data(self):
+        r"""Deletes previous merger tree, but keeps parentage
+        relationships.
+        """
+        del self.levels
+
+    def find_outputs(self, zrange, cycle_range, output):
+        self.numbers = []
+        if self.external_FOF:
+            filenames = "%s/groups_*.dat" % (self.FOF_directory)
+            files = glob.glob(filenames)
+        else:
+            filenames = "%s/groups_*.txt" % (self.FOF_directory)
+            files = glob.glob(filenames)
+        # If using redshift range, load redshifts only
+        for f in files:
+            num = int(f[-9:-4])
+            if zrange is not None:
+                HC = HaloCatalog(num, external_FOF=self.external_FOF, \
+                                FOF_directory=self.FOF_directory)
+                # Allow for some epsilon
+                diff1 = (HC.redshift - zrange[0]) / zrange[0]
+                diff2 = (HC.redshift - zrange[1]) / zrange[1]
+                if diff1 >= -1e-3 and diff2 <= 1e-3:
+                    self.numbers.append(num)
+                del HC
+            elif cycle_range is not None:
+                if num >= cycle_range[0] and num <= cycle_range[1]:
+                    self.numbers.append(num)
+            else:
+                self.numbers.append(num)
+        self.numbers.sort()
+
+    def run_merger_tree(self, output):
+        # Run merger tree for all outputs, starting with the last output
+        for i in range(len(self.numbers)-1, 0, -1):
+            if output:
+                output = "%s/tree-%5.5d-%5.5d" % \
+                         (self.FOF_directory, self.numbers[i], self.numbers[i-1])
+            else:
+                output = None
+            z0, z1, fr = find_halo_relationships(self.numbers[i], \
+                                                 self.numbers[i-1], \
+                                                 output_basename=output, \
+                                                 external_FOF=self.external_FOF,
+                                                 FOF_directory=self.FOF_directory)
+            self.relationships[self.numbers[i]] = fr
+            self.redshifts[self.numbers[i]] = z0
+        # Fill in last redshift
+        self.redshifts[self.numbers[0]] = z1
+
+    def build_tree(self, halonum, min_particles=0, max_children=1e20):
+        r"""Builds a merger tree, starting at the last output.
+
+        Parameters
+        ----------
+        halonum : int
+            Halo number in the last output to analyze.
+        min_particles : int, optional
+            Minimum number of particles of halos in tree.
+        max_children : int, optional
+            Maximum number of child halos each leaf can have.
+        """
+        self.halonum = halonum
+        self.max_children = max_children
+        self.output_numbers = sorted(self.relationships, reverse=True)
+        self.levels = {}
+        trunk = self.output_numbers[0]
+        self.levels[trunk] = [EnzoFOFMergerBranch(self, trunk, halonum,
+                                                  max_children)]
+        self.generate_tree(min_particles, max_children)
+
+    def filter_small_halos(self, lvl, min_particles):
+        # Filter out children with less than min_particles
+        for h in self.levels[lvl]:
+            fil = []
+            for c in h.children:
+                if c[2] > min_particles:  # c[2] = npart
+                    fil.append(c)
+            h.children = fil
+
+    def generate_tree(self, min_particles, max_children):
+        self.filter_small_halos(self.output_numbers[0], min_particles)
+        for i in range(1,len(self.output_numbers)):
+            prev = self.output_numbers[i-1]
+            this = self.output_numbers[i]
+            self.levels[this] = []
+            this_halos = []  # To check for duplicates
+            for h in self.levels[prev]:
+                for c in h.children:
+                    if c[0] in this_halos: continue
+                    if self.relationships[this] == {}: continue
+                    branch = EnzoFOFMergerBranch(self, this, c[0],
+                                                 max_children)
+                    self.levels[this].append(branch)
+                    this_halos.append(c[0])
+            self.filter_small_halos(this, min_particles)
+
+    def get_massive_progenitors(self, halonum, min_relation=0.25):
+        r"""Returns a list of the most massive progenitor halos.
+
+        This routine walks down the tree, following the most massive
+        progenitor on each node.
+
+        Parameters
+        ----------
+        halonum : int
+            Halo number at the last output to trace.
+
+        Returns
+        -------
+        output : dict
+            Dictionary of redshifts, cycle numbers, and halo numbers
+            of the most massive progenitor.  keys = {redshift, cycle,
+            halonum}
+        """
+        output = {"redshift": [], "cycle": [], "halonum": []}
+        # First (lowest redshift) node in tree
+        halo0 = halonum
+        for cycle in sorted(self.numbers, reverse=True):
+            if cycle not in self.relationships: break
+            if halo0 not in self.relationships[cycle]: break
+            node = self.relationships[cycle][halo0]
+            output["redshift"].append(self.redshifts[cycle])
+            output["cycle"].append(cycle)
+            output["halonum"].append(halo0)
+            # Find progenitor
+            max_rel = 0.0
+            for k,v in node.items():
+                if not str(k).isdigit(): continue
+                if v[1] > max_rel and v[1] > min_relation:
+                    halo0 = k
+                    max_rel = v[1]
+        return output
+
+    def print_tree(self):
+        r"""Prints the merger tree to stdout.
+        """
+        for lvl in sorted(self.levels, reverse=True):
+            if lvl not in self.redshifts: continue
+            print "========== Cycle %5.5d (z=%f) ==========" % \
+                  (lvl, self.redshifts[lvl])
+            for br in self.levels[lvl]:
+                print "Parent halo = %d" % br.halo_id
+                print "--> Most massive progenitor == Halo %d" % \
+                      (br.progenitor)
+                for i,c in enumerate(br.children):
+                    if i > self.max_children: break
+                    print "-->    Halo %8.8d :: fraction = %g" % (c[0], c[1])
+
+    def save_halo_evolution(self, filename):
+        """
+        Saves as an HDF5 file the relevant details about a halo
+        over the course of its evolution following the most massive
+        progenitor to have given it the bulk of its particles.
+        It stores info from the FOF_groups file: location, mass, id, etc.
+        """
+        f = h5py.File("%s/%s" % (self.FOF_directory, filename), 'a')
+        cycle_fin = self.redshifts.keys()[-1]
+        halo_id = self.levels[cycle_fin][0].halo_id
+        halo = "halo%05d" % halo_id
+        if halo in f:
+            del f["halo%05d" % halo_id]
+        g = f.create_group("halo%05d" % halo_id)
+        size = len(self.redshifts)
+        cycle = np.zeros(size)
+        redshift = np.zeros(size)
+        halo_id = np.zeros(size)
+        fraction = np.zeros(size)
+        mass = np.zeros(size)
+        densest_point = np.zeros((3,size))
+        COM = np.zeros((6,size))
+        fraction[0] = 1.
+
+        for i, lvl in enumerate(sorted(self.levels, reverse=True)):
+            if len(self.levels[lvl]) == 0:  # lineage for this halo ends
+                cycle = cycle[:i]           # so truncate arrays, and break
+                redshift = redshift[:i]     # Not big enough.
+                halo_id = halo_id[:i]
+                fraction = fraction[:i]
+                mass = mass[:i]
+                densest_point = densest_point[:,:i]
+                COM = COM[:,:i]
+                break   
+            if lvl not in self.redshifts: continue
+            mylog.info("========== Cycle %5.5d (z=%f) ==========" % \
+                  (lvl, self.redshifts[lvl]))
+            cycle[i] = lvl 
+            redshift[i] = self.redshifts[lvl]
+
+            br = self.levels[lvl][0]
+            mylog.info("Parent halo = %d" % br.halo_id)
+            mylog.info("-> Most massive progenitor == Halo %d" % (br.progenitor))
+            halo_id[i] = br.halo_id
+
+            if len(br.children) == 0:     # lineage for this halo ends 
+                cycle = cycle[:i+1]       # (no children)
+                redshift = redshift[:i+1] # so truncate arrays, and break
+                halo_id = halo_id[:i+1]
+                fraction = fraction[:i+1]
+                mass = mass[:i+1]
+                densest_point = densest_point[:,:i+1]
+                COM = COM[:,:i+1]
+                break   
+
+            if i < size-1:
+                fraction[i+1] = br.children[0][1]  
+
+            # open up FOF file to parse for details
+            filename = "%s/groups_%05d.txt" % (self.FOF_directory, lvl)
+            mass[i], densest_point[:,i], COM[:,i] = \
+                    grab_FOF_halo_info_internal(filename, br.halo_id)
+
+        # save the arrays in the hdf5 file
+        g.create_dataset("cycle", data=cycle)
+        g.create_dataset("redshift", data=redshift)
+        g.create_dataset("halo_id", data=halo_id)
+        g.create_dataset("fraction", data=fraction)
+        g.create_dataset("mass", data=mass)
+        g.create_dataset("densest_point", data=densest_point)
+        g.create_dataset("COM", data=COM)
+        f.close()
+
+    def write_dot(self, filename=None):
+        r"""Writes merger tree to a GraphViz or image file.
+
+        Parameters
+        ----------
+        filename : str, optional
+            Filename to write the GraphViz file.  Default will be
+            tree_halo%05i.gv, which is a text file in the GraphViz format.
+            If filename is an image (e.g. "MergerTree.png") the output will
+            be in the appropriate image format made by calling GraphViz
+            automatically. See GraphViz (e.g. "dot -v")
+            for a list of available output formats.
+        """
+        if filename == None: 
+            filename = "%s/tree_halo%5.5d.gv" % \
+                        (self.FOF_directory, self.halonum)
+        # Create the pydot graph object.
+        self.graph = pydot.Dot('galaxy', graph_type='digraph')
+        self.halo_shape = "rect"
+        self.z_shape = "plaintext"
+        # Subgraphs to align levels
+        self.subgs = {}
+        for num in self.numbers:
+            self.subgs[num] = pydot.Subgraph('', rank = 'same')
+            self.graph.add_subgraph(self.subgs[num])
+        sorted_lvl = sorted(self.levels, reverse=True)
+        for ii,lvl in enumerate(sorted_lvl):
+            # Since we get the cycle number from the key, it won't
+            # exist for the last level, i.e. children of last level.
+            # Get it from self.numbers.
+            if ii < len(sorted_lvl)-1:
+                next_lvl = sorted_lvl[ii+1]
+            else:
+                next_lvl = self.numbers[0]
+            for br in self.levels[lvl]:
+                for c in br.children:
+                    color = "red" if c[0] == br.progenitor else "black"
+                    self.graph.add_edge(pydot.Edge("C%d_H%d" %(lvl, br.halo_id),
+                        "C%d_H%d" % (next_lvl, c[0]), color=color))
+                    #line = "    C%d_H%d -> C%d_H%d [color=%s];\n" % \
+                    #      (lvl, br.halo_id, next_lvl, c[0], color)
+                    
+                    #fp.write(line)
+                    last_level = (ii,lvl)
+        for ii,lvl in enumerate(sorted_lvl):
+            npart_max = 0
+            for br in self.levels[lvl]:
+                if br.npart > npart_max: npart_max = br.npart
+            for br in self.levels[lvl]:
+                halo_str = "C%d_H%d" % (lvl, br.halo_id)
+                style = "filled" if br.npart == npart_max else "solid"
+                self.graph.add_node(pydot.Node(halo_str,
+                label = "Halo %d\\n%d particles" % (br.halo_id, br.npart),
+                style = style, shape = self.halo_shape))
+                # Add this node to the correct level subgraph.
+                self.subgs[lvl].add_node(pydot.Node(halo_str))
+        for lvl in self.numbers:
+            # Don't add the z if there are no halos already in the subgraph.
+            if len(self.subgs[lvl].get_node_list()) == 0: continue
+            self.subgs[lvl].add_node(pydot.Node("%1.5e" % self.redshifts[lvl],
+                shape = self.z_shape, label = "z=%0.3f" % self.redshifts[lvl]))
+        # Based on the suffix of the file name, write out the result to a file.
+        suffix = filename.split(".")[-1]
+        if suffix == "gv": suffix = "raw"
+        mylog.info("Writing %s format %s to disk." % (suffix, filename))
+        self.graph.write("%s" % filename, format=suffix)
+
+def find_halo_relationships(output1_id, output2_id, output_basename = None,
+                            radius = 0.10, external_FOF=True, 
+                            FOF_directory='FOF'):
+    r"""Calculate the parentage and child relationships between two EnzoFOF
+    halo catalogs.
+
+    This function performs a very simple merger tree calculation between two
+    sets of halos.  For every halo in the second halo catalog, it looks to the
+    first halo catalog to find the parents by looking at particle IDs.  The
+    particle IDs from the child halos are identified in potential parents, and
+    then both percent-of-parent and percent-to-child values are recorded.
+
+    Note that this works with catalogs constructed by Enzo's FOF halo
+    when used in external_FOF=True mode, whereas it will work with 
+    catalogs constructed by yt using external_FOF=False mode.
+
+    Parameters
+    ----------
+    output1_id : int
+        This is the integer output id of the (first) halo catalog to parse and
+        load.
+    output2_id : int
+        This is the integer output id of the (second) halo catalog to parse and
+        load.
+    output_basename : string
+        If provided, both .cpkl and .txt files containing the parentage
+        relationships will be output.
+    radius : float, default to 0.10
+        In absolute units, the radius to examine when guessing possible
+        parent/child relationships.  If this value is too small, you will miss
+        possible relationships.
+    FOF_directory : str, optional
+        Directory where FOF files are located
+
+    Returns
+    -------
+    pfrac : dict
+        This is a dict of dicts.  The first key is the parent halo id, the
+        second is the child halo id.  The values are the percent contributed
+        from parent to child and the percent of a child that came from the
+        parent.
+    """
+    mylog.info("Parsing Halo Catalog %04i", output1_id)
+    HC1 = HaloCatalog(output1_id, False, external_FOF=external_FOF, \
+                      FOF_directory=FOF_directory)
+    mylog.info("Parsing Halo Catalog %04i", output2_id)
+    HC2 = HaloCatalog(output2_id, True, external_FOF=external_FOF, \
+                      FOF_directory=FOF_directory)
+    mylog.info("Calculating fractions")
+    pfrac = HC1.calculate_parentage_fractions(HC2)
+
+    if output_basename is not None and pfrac != {}:
+        f = open("%s.txt" % (output_basename), "w")
+        for hid1 in sorted(pfrac):
+            for hid2 in sorted(pfrac[hid1]):
+                if not str(hid2).isdigit(): continue
+                p1, p2, npart = pfrac[hid1][hid2]
+                if p1 == 0.0: continue
+                f.write( "Halo %s (%s) contributed %0.3e of its particles to %s (%s), which makes up %0.3e of that halo\n" % (
+                            hid1, output1_id, p2, hid2, output2_id, p1))
+        f.close()
+
+        cPickle.dump(pfrac, open("%s.cpkl" % (output_basename), "wb"))
+
+    return HC1.redshift, HC2.redshift, pfrac
+
+def grab_FOF_halo_info_internal(filename, halo_id):
+    """
+    Finds a specific halo's information in the FOF group output information
+    and pass relevant parameters to caller.
+    """
+    # open up FOF file to parse for details
+    groups_file = open(filename, 'r')
+    for line in groups_file:
+        if line.startswith("#"): continue
+        if int(line.split()[0]) == halo_id:
+            ar = np.array(line.split()).astype('float64')
+            return ar[1], ar[4:7], ar[7:13]  # mass, xyz_dens, xyzvxvyvz_COM
+
+def plot_halo_evolution(filename, halo_id, x_quantity='cycle', y_quantity='mass',
+                        x_log=False, y_log=True, FOF_directory='FOF'):
+    """
+    Once you have generated a file using the 
+    EnzoFOFMergerTree.save_halo_evolution function, this is a simple way of 
+    plotting the evolution in the quantities of that halo over its lifetime.
+
+    Parameters
+    ----------
+    filename : str
+        The filename to which you saved the hdf5 data from save_halo_evolution
+    halo_id : int
+        The halo in 'filename' that you want to follow
+    x_quantity, y_quantity : str, optional
+        The quantity that you want to plot as the x_coord (or y_coords).
+        Valid options are:
+
+           * cycle
+           * mass
+           * fraction
+           * halo_id
+           * redshift
+           * dense_x
+           * dense_y
+           * dense_z
+           * COM_x
+           * COM_y
+           * COM_z
+           * COM_vx
+           * COM_vy
+           * COM_vz
+
+    x_log, y_log : bool, optional
+        Do you want the x(y)-axis to be in log or linear?
+    FOF_directory : str, optional
+        Directory where FOF files (and hdf file) are located
+
+    Examples
+    --------
+
+    >>> # generates mass history plots for the 20 most massive halos at t_fin.
+    >>> ts = DatasetSeries.from_filenames("DD????/DD????")
+    >>> # long step--must run FOF on each DD, but saves outputs for later use
+    >>> for ds in ts:   
+    ...     halo_list = FOFHaloFinder(ds)
+    ...     i = int(ds.basename[2:])
+    ...     halo_list.write_out("FOF/groups_%05i.txt" % i)
+    ...     halo_list.write_particle_lists("FOF/particles_%05i" % i)
+    ...
+    >>> mt = EnzoFOFMergerTree(external_FOF=False)
+    >>> for i in range(20):
+    ...     mt.build_tree(i)
+    ...     mt.save_halo_evolution('halos.h5')
+    ...
+    >>> for i in range(20):
+    ...     plot_halo_evolution('halos.h5', i)
+    """
+    import matplotlib.pyplot as plt
+    f = h5py.File("%s/%s" % (FOF_directory, filename), 'r')
+    basename = os.path.splitext(filename)[0]
+    halo = "halo%05d" % halo_id
+    basename = basename + "_" + halo
+    g = f[halo]
+    values = list(g)
+    index_dict = {'x' : 0, 'y' : 1, 'z' : 2, 'vx' : 3, 'vy' : 4, 'vz' : 5}
+    coords = {}
+    fields = {}
+    for i, quantity in enumerate((x_quantity, y_quantity)):
+        field = quantity
+        if quantity.startswith('COM'):
+            index = index_dict[quantity.split('_')[-1]]
+            quantity = ('COM')
+        if quantity.startswith('dense'):
+            index = index_dict[quantity.split('_')[-1]]
+            quantity = ('densest_point')
+        if quantity not in values:
+            exit('%s not in list of values in %s for halo %d' % \
+                 (quantity, filename, halo_id))
+        if not field == quantity:
+            coords[i] = g[quantity][index,:]
+        else:
+            coords[i] = g[quantity]
+        if len(coords[i]) == 1: 
+            # ("Only 1 value for Halo %d.  Ignoring." % halo_id)
+            return
+        fields[i] = field
+
+    ax = plt.axes()
+    ax.plot(coords[0], coords[1])
+    ax.set_title(basename)
+    ax.set_xlabel(fields[0])
+    ax.set_ylabel(fields[1])
+    if x_log:
+        ax.set_xscale("log")
+    if y_log:
+        ax.set_yscale("log")
+    ofn = "%s/%s_%s_%s.png" % (FOF_directory, basename, fields[0], fields[1])
+    plt.savefig(ofn)
+    plt.clf()

diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_analysis/halo_filters.py
--- a/yt/analysis_modules/halo_analysis/halo_filters.py
+++ b/yt/analysis_modules/halo_analysis/halo_filters.py
@@ -66,7 +66,7 @@
 
 add_filter("quantity_value", quantity_value)
 
-def _not_subhalo(halo, field_type="halos"):
+def not_subhalo(halo, field_type="halos"):
     """
     Only return true if this halo is not a subhalo.
     
@@ -76,11 +76,11 @@
 
     if not hasattr(halo.halo_catalog, "parent_dict"):
         halo.halo_catalog.parent_dict = \
-          create_parent_dict(halo.halo_catalog.data_source, ptype=field_type)
+          _create_parent_dict(halo.halo_catalog.data_source, ptype=field_type)
     return halo.halo_catalog.parent_dict[int(halo.quantities["particle_identifier"])] == -1
-add_filter("not_subhalo", _not_subhalo)
+add_filter("not_subhalo", not_subhalo)
 
-def create_parent_dict(data_source, ptype="halos"):
+def _create_parent_dict(data_source, ptype="halos"):
     """
     Create a dictionary of halo parents to allow for filtering of subhalos.
 

diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_finding/api.py
--- a/yt/analysis_modules/halo_finding/api.py
+++ b/yt/analysis_modules/halo_finding/api.py
@@ -16,16 +16,13 @@
 from .halo_objects import \
     Halo, \
     HOPHalo, \
-    parallelHOPHalo, \
     LoadedHalo, \
     FOFHalo, \
     HaloList, \
     HOPHaloList, \
     FOFHaloList, \
-    parallelHOPHaloList, \
     LoadedHaloList, \
     GenericHaloFinder, \
-    parallelHF, \
     HOPHaloFinder, \
     FOFHaloFinder, \
     HaloFinder, \

diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_finding/halo_objects.py
--- a/yt/analysis_modules/halo_finding/halo_objects.py
+++ b/yt/analysis_modules/halo_finding/halo_objects.py
@@ -26,12 +26,14 @@
 from collections import defaultdict
 from yt.extern.six import add_metaclass
 
-from yt.funcs import *
-
 from yt.config import ytcfg
+from yt.funcs import mylog
 from yt.utilities.performance_counters import \
-    yt_counters, time_function
-from yt.utilities.math_utils import periodic_dist, get_rotation_matrix
+    time_function, \
+    yt_counters
+from yt.utilities.math_utils import \
+    get_rotation_matrix, \
+    periodic_dist
 from yt.utilities.physical_constants import \
     mass_sun_cgs, \
     TINY
@@ -40,11 +42,6 @@
     
 from .hop.EnzoHop import RunHOP
 from .fof.EnzoFOF import RunFOF
-try:
-    from parallel_hop.parallel_hop_interface import \
-        ParallelHOPHaloFinder
-except ImportError:
-    mylog.debug("Parallel HOP not imported.")
 
 from yt.utilities.parallel_tools.parallel_analysis_interface import \
     ParallelDummy, \
@@ -447,7 +444,8 @@
         Msun2g = mass_sun_cgs
         rho_crit = rho_crit * ((1.0 + z) ** 3.0)
         # Get some pertinent information about the halo.
-        self.mass_bins = YTArray(np.zeros(self.bin_count + 1, dtype='float64'),'Msun')
+        self.mass_bins = self.ds.arr(np.zeros(self.bin_count + 1, 
+                                              dtype='float64'),'Msun')
         dist = np.empty(thissize, dtype='float64')
         cen = self.center_of_mass()
         mark = 0
@@ -694,113 +692,6 @@
     pass
 
 
-class parallelHOPHalo(Halo, ParallelAnalysisInterface):
-    dont_wrap = ["maximum_density", "maximum_density_location",
-        "center_of_mass", "total_mass", "bulk_velocity", "maximum_radius",
-        "get_size", "get_sphere", "write_particle_list", "__getitem__",
-        "virial_info", "virial_bin", "virial_mass", "virial_radius",
-        "rms_velocity"]
-
-    def virial_info(self, bins=300):
-        r"""Calculates the virial information for the halo. Generally, it is
-        better to call virial_radius or virial_mass instead, which calls this
-        function automatically.
-        """
-        # Skip if we've already calculated for this number of bins.
-        if self.bin_count == bins and self.overdensity is not None:
-            return None
-        # Do this for all because all will use it.
-        self.bin_count = bins
-        period = self.data.ds.domain_right_edge - \
-            self.data.ds.domain_left_edge
-        self.mass_bins = np.zeros(self.bin_count + 1, dtype='float64')
-        cen = self.center_of_mass()
-        # Cosmology
-        h = self.data.ds.hubble_constant
-        Om_matter = self.data.ds.omega_matter
-        z = self.data.ds.current_redshift
-        rho_crit = rho_crit_g_cm3_h2 * h ** 2.0 * Om_matter  # g cm^-3
-        Msun2g = mass_sun_cgs
-        rho_crit = rho_crit * ((1.0 + z) ** 3.0)
-        # If I own some of this halo operate on the particles.
-        if self.indices is not None:
-            # Get some pertinent information about the halo.
-            dist = np.empty(self.indices.size, dtype='float64')
-            mark = 0
-            # Find the distances to the particles.
-            # I don't like this much, but I
-            # can't see a way to eliminate a loop like this, either here or in
-            # yt.math_utils.
-            for pos in itertools.izip(self["particle_position_x"],
-                    self["particle_position_y"], self["particle_position_z"]):
-                dist[mark] = periodic_dist(cen, pos, period)
-                mark += 1
-            dist_min, dist_max = min(dist), max(dist)
-        # If I don't have this halo, make some dummy values.
-        else:
-            dist_min = max(period)
-            dist_max = 0.0
-        # In this parallel case, we're going to find the global dist extrema
-        # and built identical bins on all tasks.
-        dist_min = self.comm.mpi_allreduce(dist_min, op='min')
-        dist_max = self.comm.mpi_allreduce(dist_max, op='max')
-        # Set up the radial bins.
-        # Multiply min and max to prevent issues with digitize below.
-        self.radial_bins = np.logspace(math.log10(dist_min * .99 + TINY),
-            math.log10(dist_max * 1.01 + 2 * TINY), num=self.bin_count + 1)
-        self.radial_bins = ds.arr(self.radial_bins, 'code_length')
-
-        if self.indices is not None and self.indices.size > 1:
-            # Find out which bin each particle goes into, and add the particle
-            # mass to that bin.
-            inds = np.digitize(dist, self.radial_bins) - 1
-            for index in np.unique(inds):
-                self.mass_bins[index] += \
-                    np.sum(self["particle_mass"][inds == index]).in_units('Msun')
-            # Now forward sum the masses in the bins.
-            for i in xrange(self.bin_count):
-                self.mass_bins[i + 1] += self.mass_bins[i]
-        # Sum up the mass_bins globally
-        self.mass_bins = self.comm.mpi_allreduce(self.mass_bins, op='sum')
-        # Calculate the over densities in the bins.
-        self.overdensity = self.mass_bins * Msun2g / \
-        (4. / 3. * math.pi * rho_crit * \
-        (self.radial_bins) ** 3.0)
-
-    def _get_ellipsoid_parameters_basic(self):
-        mylog.error("Ellipsoid calculation does not work for parallelHF halos." + \
-        " Please save the halos using .dump(), and reload them using" + \
-        " LoadHaloes() to use this function.")
-        return None
-
-    def get_ellipsoid_parameters(self):
-        r"""Calculate the parameters that describe the ellipsoid of
-        the particles that constitute the halo.
-        
-        Parameters
-        ----------
-        None
-        
-        Returns
-        -------
-        tuple : (cm, mag_A, mag_B, mag_C, e0_vector, tilt)
-            The 6-tuple has in order:
-              #. The center of mass as an array.
-              #. mag_A as a float.
-              #. mag_B as a float.
-              #. mag_C as a float.
-              #. e0_vector as an array.
-              #. tilt as a float.
-        
-        Examples
-        --------
-        >>> params = halos[0].get_ellipsoid_parameters()
-        """
-        mylog.error("get_ellipsoid_parameters does not work for parallelHF halos." + \
-        " Please save the halos using .dump(), and reload them using" + \
-        " LoadHaloes() to use this function.")
-        return None
-
 class FOFHalo(Halo):
 
     def maximum_density(self):
@@ -1565,254 +1456,6 @@
                 CoM = cen, max_radius = r, supp = temp_dict))
             halo += 1
 
-
-class parallelHOPHaloList(HaloList, ParallelAnalysisInterface):
-    """
-    Run hop on *data_source* with a given density *threshold*.  If
-    *dm_only* is True (default), only run it on the dark matter particles, otherwise
-    on all particles.  Returns an iterable collection of *HopGroup* items.
-    """
-    _name = "parallelHOP"
-    _halo_class = parallelHOPHalo
-    _fields = ["particle_position_%s" % ax for ax in 'xyz'] + \
-              ["particle_mass", "particle_index"]
-
-    def __init__(self, data_source, padding, num_neighbors, bounds, total_mass,
-        period, threshold=160.0, dm_only=True, rearrange=True, premerge=True,
-        tree='F'):
-        ParallelAnalysisInterface.__init__(self)
-        self.threshold = threshold
-        self.num_neighbors = num_neighbors
-        self.bounds = bounds
-        self.total_mass = total_mass
-        self.rearrange = rearrange
-        self.period = period
-        self.old_period = period.copy()
-        self.period = np.array([1.] * 3)
-        self._data_source = data_source
-        self.premerge = premerge
-        self.tree = tree
-        mylog.info("Initializing HOP")
-        HaloList.__init__(self, data_source, dm_only)
-
-    def _run_finder(self):
-        yt_counters("Reading Data")
-        # Test to make sure the particle IDs aren't suspicious.
-        exit = False
-        if (self.particle_fields["particle_index"] < 0).any():
-            mylog.error("Negative values in particle_index field. Parallel HOP will fail.")
-            exit = True
-        if np.unique(self.particle_fields["particle_index"]).size != \
-                self.particle_fields["particle_index"].size:
-            mylog.error("Non-unique values in particle_index field. Parallel HOP will fail.")
-            exit = True
-
-        self.comm.mpi_exit_test(exit)
-        # Try to do this in a memory conservative way.
-        np.divide(self.particle_fields['particle_mass'].in_units('Msun'), self.total_mass,
-            self.particle_fields['particle_mass'])
-        np.divide(self.particle_fields["particle_position_x"],
-            self.old_period[0], self.particle_fields["particle_position_x"])
-        np.divide(self.particle_fields["particle_position_y"],
-            self.old_period[1], self.particle_fields["particle_position_y"])
-        np.divide(self.particle_fields["particle_position_z"],
-            self.old_period[2], self.particle_fields["particle_position_z"])
-        obj = ParallelHOPHaloFinder(self.period, self.padding,
-            self.num_neighbors, self.bounds,
-            self.particle_fields,
-            self.threshold, rearrange=self.rearrange, premerge=self.premerge,
-            tree=self.tree)
-        self.densities, self.tags = obj.density, obj.chainID
-        # I'm going to go ahead and delete self.densities because it's not
-        # actually being used. I'm not going to remove it altogether because
-        # it may be useful to someone someday.
-        del self.densities
-        self.group_count = obj.group_count
-        self.group_sizes = obj.group_sizes
-        if self.group_count == 0:
-            mylog.info("There are no halos found.")
-            return
-        self.CoM = obj.CoM
-        self.Tot_M = obj.Tot_M * self.total_mass
-        self.max_dens_point = obj.max_dens_point
-        self.max_radius = obj.max_radius
-        for dd in range(3):
-            self.CoM[:, dd] *= self.old_period[dd]
-            self.max_dens_point[:, dd + 1] *= self.old_period[dd]
-        # This is wrong, below, with uneven boundaries. We'll cross that bridge
-        # when we get there.
-        self.max_radius *= self.old_period[0]
-        self.period = self.old_period.copy()
-        # Precompute the bulk velocity in parallel.
-        yt_counters("Precomp bulk vel.")
-        self.bulk_vel = np.zeros((self.group_count, 3), dtype='float64')
-        yt_counters("bulk vel. reading data")
-        pm = obj.mass
-        # Fix this back to un-normalized units.
-        np.multiply(pm, self.total_mass, pm)
-        xv = self._data_source["particle_velocity_x"][self._base_indices]
-        yv = self._data_source["particle_velocity_y"][self._base_indices]
-        zv = self._data_source["particle_velocity_z"][self._base_indices]
-        yt_counters("bulk vel. reading data")
-        yt_counters("bulk vel. computing")
-        select = (self.tags >= 0)
-        calc = len(np.where(select == True)[0])
-        if calc:
-            vel = np.empty((calc, 3), dtype='float64')
-            ms = pm[select]
-            vel[:, 0] = xv[select] * ms
-            vel[:, 1] = yv[select] * ms
-            vel[:, 2] = zv[select] * ms
-            subchain = self.tags[select]
-            sort = subchain.argsort()
-            vel = vel[sort]
-            sort_subchain = subchain[sort]
-            uniq_subchain = np.unique(sort_subchain)
-            diff_subchain = np.ediff1d(sort_subchain)
-            marks = (diff_subchain > 0)
-            marks = np.arange(calc)[marks] + 1
-            marks = np.concatenate(([0], marks, [calc]))
-            for i, u in enumerate(uniq_subchain):
-                self.bulk_vel[u] = np.sum(vel[marks[i]:marks[i + 1]], axis=0)
-            del vel, subchain, sort_subchain
-            del diff_subchain
-        # Bring it together, and divide by the previously computed total mass
-        # of each halo.
-        self.bulk_vel = self.comm.mpi_allreduce(self.bulk_vel, op='sum')
-        for groupID in xrange(self.group_count):
-            self.bulk_vel[groupID] = \
-                self.bulk_vel[groupID] / self.Tot_M[groupID]
-        yt_counters("bulk vel. computing")
-        # Now calculate the RMS velocity of the groups in parallel, very
-        # similarly to the bulk velocity and re-using some of the arrays.
-        yt_counters("rms vel computing")
-        rms_vel_temp = np.zeros((self.group_count, 2), dtype='float64')
-        if calc:
-            vel = np.empty((calc, 3), dtype='float64')
-            vel[:, 0] = xv[select] * ms
-            vel[:, 1] = yv[select] * ms
-            vel[:, 2] = zv[select] * ms
-            vel = vel[sort]
-            for i, u in enumerate(uniq_subchain):
-                # This finds the sum locally.
-                rms_vel_temp[u][0] = np.sum(((vel[marks[i]:marks[i + 1]] - \
-                    self.bulk_vel[u]) / self.Tot_M[u]) ** 2.)
-                # I could use self.group_sizes...
-                rms_vel_temp[u][1] = marks[i + 1] - marks[i]
-            del vel, marks, uniq_subchain
-        # Bring it together.
-        rms_vel_temp = self.comm.mpi_allreduce(rms_vel_temp, op='sum')
-        self.rms_vel = np.empty(self.group_count, dtype='float64')
-        for groupID in xrange(self.group_count):
-            # Here we do the Mean and the Root.
-            self.rms_vel[groupID] = \
-                np.sqrt(rms_vel_temp[groupID][0] / rms_vel_temp[groupID][1]) * \
-                self.group_sizes[groupID]
-        del rms_vel_temp
-        yt_counters("rms vel computing")
-        self.taskID = obj.mine
-        self.halo_taskmap = obj.halo_taskmap  # A defaultdict.
-        del obj
-        gc.collect()
-        yt_counters("Precomp bulk vel.")
-
-    def _parse_output(self):
-        yt_counters("Final Grouping")
-        """
-        Each task will make an entry for all groups, but it may be empty.
-        """
-        unique_ids = np.unique(self.tags)
-        counts = np.bincount((self.tags + 1).tolist())
-        sort_indices = np.argsort(self.tags)
-        grab_indices = np.indices(self.tags.shape).ravel()[sort_indices]
-        del sort_indices
-        cp = 0
-        index = 0
-        # We want arrays for parallel HOP
-        self._groups = np.empty(self.group_count, dtype='object')
-        self._max_dens = np.empty((self.group_count, 4), dtype='float64')
-        if self.group_count == 0:
-            mylog.info("There are no halos found.")
-            return
-        for i in unique_ids:
-            if i == -1:
-                cp += counts[i + 1]
-                continue
-            # If there is a gap in the unique_ids, make empty groups to
-            # fill it in.
-            while index < i:
-                self._groups[index] = self._halo_class(self, index, \
-                    size=self.group_sizes[index], CoM=self.CoM[index], \
-                    max_dens_point=self.max_dens_point[index], \
-                    group_total_mass=self.Tot_M[index],
-                    max_radius=self.max_radius[index],
-                    bulk_vel=self.bulk_vel[index],
-                    tasks=self.halo_taskmap[index],
-                    rms_vel=self.rms_vel[index])
-                # I don't own this halo
-                self.comm.do_not_claim_object(self._groups[index])
-                self._max_dens[index] = [self.max_dens_point[index][0],
-                    self.max_dens_point[index][1], \
-                    self.max_dens_point[index][2],
-                    self.max_dens_point[index][3]]
-                index += 1
-            cp_c = cp + counts[i + 1]
-            group_indices = grab_indices[cp:cp_c]
-            self._groups[index] = self._halo_class(self, i, group_indices, \
-                size=self.group_sizes[i], CoM=self.CoM[i], \
-                max_dens_point=self.max_dens_point[i], \
-                group_total_mass=self.Tot_M[i], max_radius=self.max_radius[i],
-                bulk_vel=self.bulk_vel[i], tasks=self.halo_taskmap[index],
-                rms_vel=self.rms_vel[i])
-            # This halo may be owned by many, including this task
-            self.comm.claim_object(self._groups[index])
-            self._max_dens[index] = [self.max_dens_point[i][0],
-                self.max_dens_point[i][1], \
-                self.max_dens_point[i][2], self.max_dens_point[i][3]]
-            cp += counts[i + 1]
-            index += 1
-        # If there are missing groups at the end, add them.
-        while index < self.group_count:
-            self._groups[index] = self._halo_class(self, index, \
-                size=self.group_sizes[index], CoM=self.CoM[index], \
-                max_dens_point=self.max_dens_point[index], \
-                group_total_mass=self.Tot_M[index],
-                max_radius=self.max_radius[index],
-                bulk_vel=self.bulk_vel[index], tasks=self.halo_taskmap[index],
-                rms_vel=self.rms_vel[index])
-            self.comm.do_not_claim_object(self._groups[index])
-            self._max_dens[index] = [self.max_dens_point[index][0],
-                self.max_dens_point[index][1], \
-                self.max_dens_point[index][2], self.max_dens_point[index][3]]
-            index += 1
-        # Clean up
-        del self.max_dens_point, self.max_radius, self.bulk_vel
-        del self.halo_taskmap, self.tags, self.rms_vel
-        del grab_indices, unique_ids, counts
-        try:
-            del group_indices
-        except UnboundLocalError:
-            pass
-
-    def __len__(self):
-        return self.group_count
-
-    def write_out(self, filename="parallelHopAnalysis.out", ellipsoid_data=False):
-        r"""Write out standard halo information to a text file.
-
-        Parameters
-        ----------
-        filename : String
-            The name of the file to write to.
-            Default = "parallelHopAnalysis.out".
-
-        Examples
-        --------
-        >>> halos.write_out("parallelHopAnalysis.out")
-        """
-        HaloList.write_out(self, filename, ellipsoid_data)
-
-
 class GenericHaloFinder(HaloList, ParallelAnalysisInterface):
     def __init__(self, ds, data_source, dm_only=True, padding=0.0):
         ParallelAnalysisInterface.__init__(self)
@@ -2007,334 +1650,6 @@
         self.write_particle_lists(basename)
         self.write_particle_lists_txt(basename)
 
-
-class parallelHF(GenericHaloFinder, parallelHOPHaloList):
-    r"""Parallel HOP halo finder.
-
-    Halos are built by:
-    1. Calculating a density for each particle based on a smoothing kernel.
-    2. Recursively linking particles to other particles from lower density
-    particles to higher.
-    3. Geometrically proximate chains are identified and
-    4. merged into final halos following merging rules.
-
-    Lower thresholds generally produce more halos, and the largest halos
-    become larger. Also, halos become more filamentary and over-connected.
-
-    This is very similar to HOP, but it does not produce precisely the
-    same halos due to unavoidable numerical differences.
-
-    Skory et al. "Parallel HOP: A Scalable Halo Finder for Massive
-    Cosmological Data Sets." arXiv (2010) 1001.3411
-
-    Parameters
-    ----------
-    ds : `Dataset`
-        The dataset on which halo finding will be conducted.
-    threshold : float
-        The density threshold used when building halos. Default = 160.0.
-    dm_only : bool
-        If True, only dark matter particles are used when building halos.
-        Default = True.
-    resize : bool
-        Turns load-balancing on or off. Default = True.
-    kdtree : string
-        Chooses which kD Tree to use. The Fortran one (kdtree = 'F') is
-        faster, but uses more memory. The Cython one (kdtree = 'C') is
-        slower but is more memory efficient.
-        Default = 'F'
-    rearrange : bool
-        Turns on faster nearest neighbor searches at the cost of increased
-        memory usage.
-        This option only applies when using the Fortran tree.
-        Default = True.
-    fancy_padding : bool
-        True calculates padding independently for each face of each
-        subvolume. Default = True.
-    safety : float
-        Due to variances in inter-particle spacing in the volume, the
-        padding may need to be increased above the raw calculation.
-        This number is multiplied to the calculated padding, and values
-        >1 increase the padding. Default = 1.5.
-    premerge : bool
-        True merges chains in two steps (rather than one with False), which
-        can speed up halo finding by 25% or more. However, True can result
-        in small (<<1%) variations in the final halo masses when compared
-        to False. Default = True.
-    sample : float
-        The fraction of the full dataset on which load-balancing is
-        performed. Default = 0.03.
-    total_mass : float
-        If HOP is run on the same dataset mulitple times, the total mass
-        of particles in Msun units in the full volume can be supplied here
-        to save time.
-        This must correspond to the particles being operated on, meaning
-        if stars are included in the halo finding, they must be included
-        in this mass as well, and visa-versa.
-        If halo finding on a subvolume, this still corresponds with the
-        mass in the entire volume.
-        Default = None, which means the total mass is automatically
-        calculated.
-    num_particles : integer
-        The total number of particles in the volume, in the same fashion
-        as `total_mass` is calculated. Specifying this turns off
-        fancy_padding.
-        Default = None, which means the number of particles is
-        automatically calculated.
-
-    Examples
-    -------
-    >>> ds = load("RedshiftOutput0000")
-    >>> halos = parallelHF(ds)
-    """
-    def __init__(self, ds, subvolume=None, threshold=160, dm_only=True, \
-        resize=True, rearrange=True,\
-        fancy_padding=True, safety=1.5, premerge=True, sample=0.03, \
-        total_mass=None, num_particles=None, tree='F'):
-        if subvolume is not None:
-            ds_LE = np.array(subvolume.left_edge)
-            ds_RE = np.array(subvolume.right_edge)
-        self._data_source = ds.all_data()
-        GenericHaloFinder.__init__(self, ds, self._data_source, dm_only,
-            padding=0.0)
-        self.padding = 0.0
-        self.num_neighbors = 65
-        self.safety = safety
-        self.sample = sample
-        self.tree = tree
-        if self.tree != 'F' and self.tree != 'C':
-            mylog.error("No kD Tree specified!")
-        period = ds.domain_right_edge - ds.domain_left_edge
-        topbounds = np.array([[0., 0., 0.], period])
-        # Cut up the volume evenly initially, with no padding.
-        padded, LE, RE, self._data_source = \
-            self.partition_index_3d(ds=self._data_source,
-            padding=self.padding)
-        # also get the total mass of particles
-        yt_counters("Reading Data")
-        # Adaptive subregions by bisection. We do not load balance if we are
-        # analyzing a subvolume.
-        ds_names = ["particle_position_x", "particle_position_y",
-            "particle_position_z"]
-        if ytcfg.getboolean("yt", "inline") == False and \
-            resize and self.comm.size != 1 and subvolume is None:
-            random.seed(self.comm.rank)
-            cut_list = self.partition_index_3d_bisection_list()
-            root_points = self._subsample_points()
-            self.bucket_bounds = []
-            if self.comm.rank == 0:
-                self._recursive_divide(root_points, topbounds, 0, cut_list)
-            self.bucket_bounds = \
-                self.comm.mpi_bcast(self.bucket_bounds)
-            my_bounds = self.bucket_bounds[self.comm.rank]
-            LE, RE = my_bounds[0], my_bounds[1]
-            self._data_source = self.ds.region([0.] * 3, LE, RE)
-        # If this isn't parallel, define the region as an AMRRegionStrict so
-        # particle IO works.
-        if self.comm.size == 1:
-            self._data_source = self.ds.region([0.5] * 3,
-                LE, RE)
-        # get the average spacing between particles for this region
-        # The except is for the serial case where the full box is what we want.
-        if num_particles is None:
-            data = self._data_source["particle_position_x"]
-        try:
-            l = self._data_source.right_edge - self._data_source.left_edge
-        except AttributeError:
-            l = ds.domain_right_edge - ds.domain_left_edge
-        vol = l[0] * l[1] * l[2]
-        full_vol = vol
-        # We will use symmetric padding when a subvolume is being used.
-        if not fancy_padding or subvolume is not None or \
-                num_particles is not None:
-            if num_particles is None:
-                num_particles = data.size
-            avg_spacing = (float(vol) / num_particles) ** (1. / 3.)
-            # padding is a function of inter-particle spacing, this is an
-            # approximation, but it's OK with the safety factor
-            padding = (self.num_neighbors) ** (1. / 3.) * self.safety * \
-                avg_spacing
-            self.padding = (np.ones(3, dtype='float64') * padding,
-                np.ones(3, dtype='float64') * padding)
-            mylog.info('padding %s avg_spacing %f vol %f local_parts %d' % \
-                (str(self.padding), avg_spacing, vol, num_particles))
-        # Another approach to padding, perhaps more accurate.
-        elif fancy_padding and self._distributed:
-            LE_padding = np.empty(3, dtype='float64')
-            RE_padding = np.empty(3, dtype='float64')
-            avg_spacing = (vol / data.size) ** (1. / 3.)
-            base_padding = (self.num_neighbors) ** (1. / 3.) * self.safety * \
-                avg_spacing
-            for dim in xrange(3):
-                if ytcfg.getboolean("yt", "inline") == False:
-                    data = self._data_source[ds_names[dim]]
-                else:
-                    data = self._data_source[ds_names[dim]]
-                num_bins = 1000
-                width = self._data_source.right_edge[dim] - \
-                    self._data_source.left_edge[dim]
-                area = (self._data_source.right_edge[(dim + 1) % 3] - \
-                    self._data_source.left_edge[(dim + 1) % 3]) * \
-                    (self._data_source.right_edge[(dim + 2) % 3] - \
-                    self._data_source.left_edge[(dim + 2) % 3])
-                bin_width = base_padding
-                num_bins = int(math.ceil(width / bin_width))
-                bins = np.arange(num_bins + 1, dtype='float64') * bin_width + \
-                    self._data_source.left_edge[dim]
-                counts, bins = np.histogram(data, bins)
-                # left side.
-                start = 0
-                count = counts[0]
-                while count < self.num_neighbors:
-                    start += 1
-                    count += counts[start]
-                # Get the avg spacing in just this boundary.
-                vol = area * (bins[start + 1] - bins[0])
-                avg_spacing = (float(vol) / count) ** (1. / 3.)
-                LE_padding[dim] = (self.num_neighbors) ** (1. / 3.) * \
-                    self.safety * avg_spacing
-                # right side.
-                start = -1
-                count = counts[-1]
-                while count < self.num_neighbors:
-                    start -= 1
-                    count += counts[start]
-                vol = area * (bins[-1] - bins[start - 1])
-                avg_spacing = (float(vol) / count) ** (1. / 3.)
-                RE_padding[dim] = (self.num_neighbors) ** (1. / 3.) * \
-                    self.safety * avg_spacing
-            self.padding = (LE_padding, RE_padding)
-            del bins, counts
-            mylog.info('fancy_padding %s avg_spacing %f full_vol %f local_parts %d %s' % \
-                (str(self.padding), avg_spacing, full_vol,
-                data.size, str(self._data_source)))
-        # Now we get the full box mass after we have the final composition of
-        # subvolumes.
-        if total_mass is None:
-            total_mass = self.comm.mpi_allreduce((self._data_source["particle_mass"].in_units('Msun').astype('float64')).sum(),
-                                                 op='sum')
-        if not self._distributed:
-            self.padding = (np.zeros(3, dtype='float64'),
-                np.zeros(3, dtype='float64'))
-        # If we're using a subvolume, we now re-divide.
-        if subvolume is not None:
-            self._data_source = ds.region([0.] * 3, ds_LE, ds_RE)
-            # Cut up the volume.
-            padded, LE, RE, self._data_source = \
-                self.partition_index_3d(ds=self._data_source,
-                padding=0.)
-        self.bounds = (LE, RE)
-        (LE_padding, RE_padding) = self.padding
-        parallelHOPHaloList.__init__(self, self._data_source, self.padding, \
-        self.num_neighbors, self.bounds, total_mass, period, \
-        threshold=threshold, dm_only=dm_only, rearrange=rearrange,
-            premerge=premerge, tree=self.tree)
-        self._join_halolists()
-        yt_counters("Final Grouping")
-
-    def _subsample_points(self):
-        # Read in a random subset of the points in each domain, and then
-        # collect them on the root task.
-        xp = self._data_source["particle_position_x"]
-        n_parts = self.comm.mpi_allreduce(xp.size, op='sum')
-        local_parts = xp.size
-        random_points = int(self.sample * n_parts)
-        # We want to get a representative selection of random particles in
-        # each subvolume.
-        adjust = float(local_parts) / (float(n_parts) / self.comm.size)
-        n_random = int(adjust * float(random_points) / self.comm.size)
-        mylog.info("Reading in %d random particles." % n_random)
-        # Get unique random particles.
-        my_points = np.empty((n_random, 3), dtype='float64')
-        uni = np.array(random.sample(xrange(xp.size), n_random))
-        uni = uni[uni.argsort()]
-        my_points[:, 0] = xp[uni]
-        del xp
-        self._data_source.clear_data()
-        my_points[:, 1] = self._data_source["particle_position_y"][uni]
-        self._data_source.clear_data()
-        my_points[:, 2] = self._data_source["particle_position_z"][uni]
-        self._data_source.clear_data()
-        del uni
-        # Collect them on the root task.
-        mine, sizes = self.comm.mpi_info_dict(n_random)
-        if mine == 0:
-            tot_random = sum(sizes.values())
-            root_points = np.empty((tot_random, 3), dtype='float64')
-            root_points.shape = (1, 3 * tot_random)
-        else:
-            root_points = np.empty([])
-        my_points.shape = (1, n_random * 3)
-        root_points = self.comm.par_combine_object(my_points[0],
-                datatype="array", op="cat")
-        del my_points
-        if mine == 0:
-            root_points.shape = (tot_random, 3)
-        return root_points
-
-    def _recursive_divide(self, points, bounds, level, cut_list):
-        dim = cut_list[level][0]
-        parts = points.shape[0]
-        num_bins = 1000
-        width = bounds[1][dim] - bounds[0][dim]
-        bin_width = width / num_bins
-        bins = np.arange(num_bins + 1, dtype='float64') * bin_width + \
-            bounds[0][dim]
-        counts, bins = np.histogram(points[:, dim], bins)
-        # Find the bin that passes the cut points.
-        midpoints = [bounds[0][dim]]
-        sum = 0
-        bin = 0
-        for step in xrange(1, cut_list[level][1]):
-            while sum < ((parts * step) / cut_list[level][1]):
-                lastsum = sum
-                sum += counts[bin]
-                bin += 1
-            # Bin edges
-            left_edge = bins[bin - 1]
-            right_edge = bins[bin]
-            # Find a better approx of the midpoint cut
-            # line using a linear approx.
-            a = float(sum - lastsum) / (right_edge - left_edge)
-            midpoints.append(left_edge + (0.5 - \
-                (float(lastsum) / parts / 2)) / a)
-        midpoints.append(bounds[1][dim])
-
-        # Split the points & update the bounds.
-        subpoints = []
-        subbounds = []
-        for pair in zip(midpoints[:-1], midpoints[1:]):
-            select = np.bitwise_and(points[:, dim] >= pair[0],
-                points[:, dim] < pair[1])
-            subpoints.append(points[select])
-            nb = bounds.copy()
-            nb[0][dim] = pair[0]
-            nb[1][dim] = pair[1]
-            subbounds.append(nb)
-        # If we're at the maxlevel, make a bucket. Otherwise, recurse down.
-        maxlevel = len(cut_list) - 1
-        for pair in zip(subpoints, subbounds):
-            if level == maxlevel:
-                self.bucket_bounds.append(pair[1])
-            else:
-                self._recursive_divide(pair[0], pair[1], level + 1, cut_list)
-
-    def _join_halolists(self):
-        if self.group_count == 0:
-            mylog.info("There are no halos found.")
-            return
-        ms = -self.Tot_M.copy()
-        del self.Tot_M
-        Cx = self.CoM[:, 0].copy()
-        sorted = np.lexsort([Cx, ms])
-        del Cx, ms
-        self._groups = self._groups[sorted]
-        self._max_dens = self._max_dens[sorted]
-        for i in xrange(self.group_count):
-            self._groups[i].id = i
-        del sorted, self.group_sizes, self.CoM
-
-
 class HOPHaloFinder(GenericHaloFinder, HOPHaloList):
     r"""HOP halo finder.
 

This diff is so big that we needed to truncate the remainder.

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.