[yt-svn] commit/yt: 8 new changesets
commits-noreply at bitbucket.org
commits-noreply at bitbucket.org
Tue Jul 29 13:42:58 PDT 2014
8 new commits in yt:
https://bitbucket.org/yt_analysis/yt/commits/759906f8eb86/
Changeset: 759906f8eb86
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-25 18:12:22
Summary: Adjusting function names for consistency.
Affected #: 1 file
diff -r d13ab03ca0f7ba973ee867b0cacfc434360d61ba -r 759906f8eb86097163bb6ee3fb21815a14ed5d27 yt/analysis_modules/halo_analysis/halo_filters.py
--- a/yt/analysis_modules/halo_analysis/halo_filters.py
+++ b/yt/analysis_modules/halo_analysis/halo_filters.py
@@ -66,7 +66,7 @@
add_filter("quantity_value", quantity_value)
-def _not_subhalo(halo, field_type="halos"):
+def not_subhalo(halo, field_type="halos"):
"""
Only return true if this halo is not a subhalo.
@@ -76,11 +76,11 @@
if not hasattr(halo.halo_catalog, "parent_dict"):
halo.halo_catalog.parent_dict = \
- create_parent_dict(halo.halo_catalog.data_source, ptype=field_type)
+ _create_parent_dict(halo.halo_catalog.data_source, ptype=field_type)
return halo.halo_catalog.parent_dict[int(halo.quantities["particle_identifier"])] == -1
-add_filter("not_subhalo", _not_subhalo)
+add_filter("not_subhalo", not_subhalo)
-def create_parent_dict(data_source, ptype="halos"):
+def _create_parent_dict(data_source, ptype="halos"):
"""
Create a dictionary of halo parents to allow for filtering of subhalos.
https://bitbucket.org/yt_analysis/yt/commits/f836c84622ce/
Changeset: f836c84622ce
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-26 13:57:48
Summary: Adding halo_analysis to api docs.
Affected #: 1 file
diff -r 759906f8eb86097163bb6ee3fb21815a14ed5d27 -r f836c84622ce1f0d0c2f1269a6804b613347ab8e doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -279,6 +279,15 @@
.. autosummary::
:toctree: generated/
+ ~yt.frontends.halo_catalogs.halo_catalog.data_structures.HaloCatalogHDF5File
+ ~yt.frontends.halo_catalogs.halo_catalog.data_structures.HaloCatalogDataset
+ ~yt.frontends.halo_catalogs.halo_catalog.fields.HaloCatalogFieldInfo
+ ~yt.frontends.halo_catalogs.halo_catalog.io.IOHandlerHaloCatalogHDF5
+ ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindParticleIndex
+ ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindHDF5File
+ ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindDataset
+ ~yt.frontends.halo_catalogs.owls_subfind.fields.OWLSSubfindFieldInfo
+ ~yt.frontends.halo_catalogs.owls_subfind.io.IOHandlerOWLSSubfindHDF5
~yt.frontends.halo_catalogs.rockstar.data_structures.RockstarBinaryFile
~yt.frontends.halo_catalogs.rockstar.data_structures.RockstarDataset
~yt.frontends.halo_catalogs.rockstar.fields.RockstarFieldInfo
@@ -394,20 +403,45 @@
~yt.data_objects.profiles.Profile3D
~yt.data_objects.profiles.create_profile
-Halo Finding and Particle Functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Halo Analysis
+^^^^^^^^^^^^^
-Halo finding can be executed using these types. Here we list the main halo
-finders as well as a few other supplemental objects.
+The ``HaloCatalog`` object is the primary means for performing custom analysis
+on cosmological halos. It is also the primary interface for halo finding.
-.. rubric:: Halo Finders
+.. autosummary::
+ :toctree: generated/
+
+ ~yt.analysis_modules.halo_analysis.halo_catalog.HaloCatalog
+ ~yt.analysis_modules.halo_analysis.halo_finding_methods.HaloFindingMethod
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.HaloCallback
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.halo_sphere
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.sphere_field_max_recenter
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.sphere_bulk_velocity
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.profile
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.save_profiles
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.load_profiles
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.virial_quantities
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.phase_plot
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.delete_attribute
+ ~yt.analysis_modules.halo_analysis.halo_filters.HaloFilter
+ ~yt.analysis_modules.halo_analysis.halo_filters.quantity_value
+ ~yt.analysis_modules.halo_analysis.halo_filters.not_subhalo
+ ~yt.analysis_modules.halo_analysis.halo_quantities.HaloQuantity
+ ~yt.analysis_modules.halo_analysis.halo_quantities.center_of_mass
+ ~yt.analysis_modules.halo_analysis.halo_quantities.bulk_velocity
+
+Halo Finding
+^^^^^^^^^^^^
+
+These provide direct access to the halo finders. However, it is strongly recommended
+to use the ``HaloCatalog``.
.. autosummary::
:toctree: generated/
~yt.analysis_modules.halo_finding.halo_objects.FOFHaloFinder
~yt.analysis_modules.halo_finding.halo_objects.HOPHaloFinder
- ~yt.analysis_modules.halo_finding.halo_objects.parallelHF
~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
Two Point Functions
https://bitbucket.org/yt_analysis/yt/commits/035187f2fd32/
Changeset: 035187f2fd32
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-26 14:17:12
Summary: Removing parallel hop, cleaning up imports, and fixing one yt array.
Affected #: 6 files
diff -r f836c84622ce1f0d0c2f1269a6804b613347ab8e -r 035187f2fd3243b0be04fcc7e6e9997bc9226d4a yt/analysis_modules/halo_finding/api.py
--- a/yt/analysis_modules/halo_finding/api.py
+++ b/yt/analysis_modules/halo_finding/api.py
@@ -16,16 +16,13 @@
from .halo_objects import \
Halo, \
HOPHalo, \
- parallelHOPHalo, \
LoadedHalo, \
FOFHalo, \
HaloList, \
HOPHaloList, \
FOFHaloList, \
- parallelHOPHaloList, \
LoadedHaloList, \
GenericHaloFinder, \
- parallelHF, \
HOPHaloFinder, \
FOFHaloFinder, \
HaloFinder, \
diff -r f836c84622ce1f0d0c2f1269a6804b613347ab8e -r 035187f2fd3243b0be04fcc7e6e9997bc9226d4a yt/analysis_modules/halo_finding/halo_objects.py
--- a/yt/analysis_modules/halo_finding/halo_objects.py
+++ b/yt/analysis_modules/halo_finding/halo_objects.py
@@ -26,12 +26,14 @@
from collections import defaultdict
from yt.extern.six import add_metaclass
-from yt.funcs import *
-
from yt.config import ytcfg
+from yt.funcs import mylog
from yt.utilities.performance_counters import \
- yt_counters, time_function
-from yt.utilities.math_utils import periodic_dist, get_rotation_matrix
+ time_function, \
+ yt_counters
+from yt.utilities.math_utils import \
+ get_rotation_matrix, \
+ periodic_dist
from yt.utilities.physical_constants import \
mass_sun_cgs, \
TINY
@@ -40,11 +42,6 @@
from .hop.EnzoHop import RunHOP
from .fof.EnzoFOF import RunFOF
-try:
- from parallel_hop.parallel_hop_interface import \
- ParallelHOPHaloFinder
-except ImportError:
- mylog.debug("Parallel HOP not imported.")
from yt.utilities.parallel_tools.parallel_analysis_interface import \
ParallelDummy, \
@@ -447,7 +444,8 @@
Msun2g = mass_sun_cgs
rho_crit = rho_crit * ((1.0 + z) ** 3.0)
# Get some pertinent information about the halo.
- self.mass_bins = YTArray(np.zeros(self.bin_count + 1, dtype='float64'),'Msun')
+ self.mass_bins = self.ds.arr(np.zeros(self.bin_count + 1,
+ dtype='float64'),'Msun')
dist = np.empty(thissize, dtype='float64')
cen = self.center_of_mass()
mark = 0
@@ -694,113 +692,6 @@
pass
-class parallelHOPHalo(Halo, ParallelAnalysisInterface):
- dont_wrap = ["maximum_density", "maximum_density_location",
- "center_of_mass", "total_mass", "bulk_velocity", "maximum_radius",
- "get_size", "get_sphere", "write_particle_list", "__getitem__",
- "virial_info", "virial_bin", "virial_mass", "virial_radius",
- "rms_velocity"]
-
- def virial_info(self, bins=300):
- r"""Calculates the virial information for the halo. Generally, it is
- better to call virial_radius or virial_mass instead, which calls this
- function automatically.
- """
- # Skip if we've already calculated for this number of bins.
- if self.bin_count == bins and self.overdensity is not None:
- return None
- # Do this for all because all will use it.
- self.bin_count = bins
- period = self.data.ds.domain_right_edge - \
- self.data.ds.domain_left_edge
- self.mass_bins = np.zeros(self.bin_count + 1, dtype='float64')
- cen = self.center_of_mass()
- # Cosmology
- h = self.data.ds.hubble_constant
- Om_matter = self.data.ds.omega_matter
- z = self.data.ds.current_redshift
- rho_crit = rho_crit_g_cm3_h2 * h ** 2.0 * Om_matter # g cm^-3
- Msun2g = mass_sun_cgs
- rho_crit = rho_crit * ((1.0 + z) ** 3.0)
- # If I own some of this halo operate on the particles.
- if self.indices is not None:
- # Get some pertinent information about the halo.
- dist = np.empty(self.indices.size, dtype='float64')
- mark = 0
- # Find the distances to the particles.
- # I don't like this much, but I
- # can't see a way to eliminate a loop like this, either here or in
- # yt.math_utils.
- for pos in itertools.izip(self["particle_position_x"],
- self["particle_position_y"], self["particle_position_z"]):
- dist[mark] = periodic_dist(cen, pos, period)
- mark += 1
- dist_min, dist_max = min(dist), max(dist)
- # If I don't have this halo, make some dummy values.
- else:
- dist_min = max(period)
- dist_max = 0.0
- # In this parallel case, we're going to find the global dist extrema
- # and built identical bins on all tasks.
- dist_min = self.comm.mpi_allreduce(dist_min, op='min')
- dist_max = self.comm.mpi_allreduce(dist_max, op='max')
- # Set up the radial bins.
- # Multiply min and max to prevent issues with digitize below.
- self.radial_bins = np.logspace(math.log10(dist_min * .99 + TINY),
- math.log10(dist_max * 1.01 + 2 * TINY), num=self.bin_count + 1)
- self.radial_bins = ds.arr(self.radial_bins, 'code_length')
-
- if self.indices is not None and self.indices.size > 1:
- # Find out which bin each particle goes into, and add the particle
- # mass to that bin.
- inds = np.digitize(dist, self.radial_bins) - 1
- for index in np.unique(inds):
- self.mass_bins[index] += \
- np.sum(self["particle_mass"][inds == index]).in_units('Msun')
- # Now forward sum the masses in the bins.
- for i in xrange(self.bin_count):
- self.mass_bins[i + 1] += self.mass_bins[i]
- # Sum up the mass_bins globally
- self.mass_bins = self.comm.mpi_allreduce(self.mass_bins, op='sum')
- # Calculate the over densities in the bins.
- self.overdensity = self.mass_bins * Msun2g / \
- (4. / 3. * math.pi * rho_crit * \
- (self.radial_bins) ** 3.0)
-
- def _get_ellipsoid_parameters_basic(self):
- mylog.error("Ellipsoid calculation does not work for parallelHF halos." + \
- " Please save the halos using .dump(), and reload them using" + \
- " LoadHaloes() to use this function.")
- return None
-
- def get_ellipsoid_parameters(self):
- r"""Calculate the parameters that describe the ellipsoid of
- the particles that constitute the halo.
-
- Parameters
- ----------
- None
-
- Returns
- -------
- tuple : (cm, mag_A, mag_B, mag_C, e0_vector, tilt)
- The 6-tuple has in order:
- #. The center of mass as an array.
- #. mag_A as a float.
- #. mag_B as a float.
- #. mag_C as a float.
- #. e0_vector as an array.
- #. tilt as a float.
-
- Examples
- --------
- >>> params = halos[0].get_ellipsoid_parameters()
- """
- mylog.error("get_ellipsoid_parameters does not work for parallelHF halos." + \
- " Please save the halos using .dump(), and reload them using" + \
- " LoadHaloes() to use this function.")
- return None
-
class FOFHalo(Halo):
def maximum_density(self):
@@ -1565,254 +1456,6 @@
CoM = cen, max_radius = r, supp = temp_dict))
halo += 1
-
-class parallelHOPHaloList(HaloList, ParallelAnalysisInterface):
- """
- Run hop on *data_source* with a given density *threshold*. If
- *dm_only* is True (default), only run it on the dark matter particles, otherwise
- on all particles. Returns an iterable collection of *HopGroup* items.
- """
- _name = "parallelHOP"
- _halo_class = parallelHOPHalo
- _fields = ["particle_position_%s" % ax for ax in 'xyz'] + \
- ["particle_mass", "particle_index"]
-
- def __init__(self, data_source, padding, num_neighbors, bounds, total_mass,
- period, threshold=160.0, dm_only=True, rearrange=True, premerge=True,
- tree='F'):
- ParallelAnalysisInterface.__init__(self)
- self.threshold = threshold
- self.num_neighbors = num_neighbors
- self.bounds = bounds
- self.total_mass = total_mass
- self.rearrange = rearrange
- self.period = period
- self.old_period = period.copy()
- self.period = np.array([1.] * 3)
- self._data_source = data_source
- self.premerge = premerge
- self.tree = tree
- mylog.info("Initializing HOP")
- HaloList.__init__(self, data_source, dm_only)
-
- def _run_finder(self):
- yt_counters("Reading Data")
- # Test to make sure the particle IDs aren't suspicious.
- exit = False
- if (self.particle_fields["particle_index"] < 0).any():
- mylog.error("Negative values in particle_index field. Parallel HOP will fail.")
- exit = True
- if np.unique(self.particle_fields["particle_index"]).size != \
- self.particle_fields["particle_index"].size:
- mylog.error("Non-unique values in particle_index field. Parallel HOP will fail.")
- exit = True
-
- self.comm.mpi_exit_test(exit)
- # Try to do this in a memory conservative way.
- np.divide(self.particle_fields['particle_mass'].in_units('Msun'), self.total_mass,
- self.particle_fields['particle_mass'])
- np.divide(self.particle_fields["particle_position_x"],
- self.old_period[0], self.particle_fields["particle_position_x"])
- np.divide(self.particle_fields["particle_position_y"],
- self.old_period[1], self.particle_fields["particle_position_y"])
- np.divide(self.particle_fields["particle_position_z"],
- self.old_period[2], self.particle_fields["particle_position_z"])
- obj = ParallelHOPHaloFinder(self.period, self.padding,
- self.num_neighbors, self.bounds,
- self.particle_fields,
- self.threshold, rearrange=self.rearrange, premerge=self.premerge,
- tree=self.tree)
- self.densities, self.tags = obj.density, obj.chainID
- # I'm going to go ahead and delete self.densities because it's not
- # actually being used. I'm not going to remove it altogether because
- # it may be useful to someone someday.
- del self.densities
- self.group_count = obj.group_count
- self.group_sizes = obj.group_sizes
- if self.group_count == 0:
- mylog.info("There are no halos found.")
- return
- self.CoM = obj.CoM
- self.Tot_M = obj.Tot_M * self.total_mass
- self.max_dens_point = obj.max_dens_point
- self.max_radius = obj.max_radius
- for dd in range(3):
- self.CoM[:, dd] *= self.old_period[dd]
- self.max_dens_point[:, dd + 1] *= self.old_period[dd]
- # This is wrong, below, with uneven boundaries. We'll cross that bridge
- # when we get there.
- self.max_radius *= self.old_period[0]
- self.period = self.old_period.copy()
- # Precompute the bulk velocity in parallel.
- yt_counters("Precomp bulk vel.")
- self.bulk_vel = np.zeros((self.group_count, 3), dtype='float64')
- yt_counters("bulk vel. reading data")
- pm = obj.mass
- # Fix this back to un-normalized units.
- np.multiply(pm, self.total_mass, pm)
- xv = self._data_source["particle_velocity_x"][self._base_indices]
- yv = self._data_source["particle_velocity_y"][self._base_indices]
- zv = self._data_source["particle_velocity_z"][self._base_indices]
- yt_counters("bulk vel. reading data")
- yt_counters("bulk vel. computing")
- select = (self.tags >= 0)
- calc = len(np.where(select == True)[0])
- if calc:
- vel = np.empty((calc, 3), dtype='float64')
- ms = pm[select]
- vel[:, 0] = xv[select] * ms
- vel[:, 1] = yv[select] * ms
- vel[:, 2] = zv[select] * ms
- subchain = self.tags[select]
- sort = subchain.argsort()
- vel = vel[sort]
- sort_subchain = subchain[sort]
- uniq_subchain = np.unique(sort_subchain)
- diff_subchain = np.ediff1d(sort_subchain)
- marks = (diff_subchain > 0)
- marks = np.arange(calc)[marks] + 1
- marks = np.concatenate(([0], marks, [calc]))
- for i, u in enumerate(uniq_subchain):
- self.bulk_vel[u] = np.sum(vel[marks[i]:marks[i + 1]], axis=0)
- del vel, subchain, sort_subchain
- del diff_subchain
- # Bring it together, and divide by the previously computed total mass
- # of each halo.
- self.bulk_vel = self.comm.mpi_allreduce(self.bulk_vel, op='sum')
- for groupID in xrange(self.group_count):
- self.bulk_vel[groupID] = \
- self.bulk_vel[groupID] / self.Tot_M[groupID]
- yt_counters("bulk vel. computing")
- # Now calculate the RMS velocity of the groups in parallel, very
- # similarly to the bulk velocity and re-using some of the arrays.
- yt_counters("rms vel computing")
- rms_vel_temp = np.zeros((self.group_count, 2), dtype='float64')
- if calc:
- vel = np.empty((calc, 3), dtype='float64')
- vel[:, 0] = xv[select] * ms
- vel[:, 1] = yv[select] * ms
- vel[:, 2] = zv[select] * ms
- vel = vel[sort]
- for i, u in enumerate(uniq_subchain):
- # This finds the sum locally.
- rms_vel_temp[u][0] = np.sum(((vel[marks[i]:marks[i + 1]] - \
- self.bulk_vel[u]) / self.Tot_M[u]) ** 2.)
- # I could use self.group_sizes...
- rms_vel_temp[u][1] = marks[i + 1] - marks[i]
- del vel, marks, uniq_subchain
- # Bring it together.
- rms_vel_temp = self.comm.mpi_allreduce(rms_vel_temp, op='sum')
- self.rms_vel = np.empty(self.group_count, dtype='float64')
- for groupID in xrange(self.group_count):
- # Here we do the Mean and the Root.
- self.rms_vel[groupID] = \
- np.sqrt(rms_vel_temp[groupID][0] / rms_vel_temp[groupID][1]) * \
- self.group_sizes[groupID]
- del rms_vel_temp
- yt_counters("rms vel computing")
- self.taskID = obj.mine
- self.halo_taskmap = obj.halo_taskmap # A defaultdict.
- del obj
- gc.collect()
- yt_counters("Precomp bulk vel.")
-
- def _parse_output(self):
- yt_counters("Final Grouping")
- """
- Each task will make an entry for all groups, but it may be empty.
- """
- unique_ids = np.unique(self.tags)
- counts = np.bincount((self.tags + 1).tolist())
- sort_indices = np.argsort(self.tags)
- grab_indices = np.indices(self.tags.shape).ravel()[sort_indices]
- del sort_indices
- cp = 0
- index = 0
- # We want arrays for parallel HOP
- self._groups = np.empty(self.group_count, dtype='object')
- self._max_dens = np.empty((self.group_count, 4), dtype='float64')
- if self.group_count == 0:
- mylog.info("There are no halos found.")
- return
- for i in unique_ids:
- if i == -1:
- cp += counts[i + 1]
- continue
- # If there is a gap in the unique_ids, make empty groups to
- # fill it in.
- while index < i:
- self._groups[index] = self._halo_class(self, index, \
- size=self.group_sizes[index], CoM=self.CoM[index], \
- max_dens_point=self.max_dens_point[index], \
- group_total_mass=self.Tot_M[index],
- max_radius=self.max_radius[index],
- bulk_vel=self.bulk_vel[index],
- tasks=self.halo_taskmap[index],
- rms_vel=self.rms_vel[index])
- # I don't own this halo
- self.comm.do_not_claim_object(self._groups[index])
- self._max_dens[index] = [self.max_dens_point[index][0],
- self.max_dens_point[index][1], \
- self.max_dens_point[index][2],
- self.max_dens_point[index][3]]
- index += 1
- cp_c = cp + counts[i + 1]
- group_indices = grab_indices[cp:cp_c]
- self._groups[index] = self._halo_class(self, i, group_indices, \
- size=self.group_sizes[i], CoM=self.CoM[i], \
- max_dens_point=self.max_dens_point[i], \
- group_total_mass=self.Tot_M[i], max_radius=self.max_radius[i],
- bulk_vel=self.bulk_vel[i], tasks=self.halo_taskmap[index],
- rms_vel=self.rms_vel[i])
- # This halo may be owned by many, including this task
- self.comm.claim_object(self._groups[index])
- self._max_dens[index] = [self.max_dens_point[i][0],
- self.max_dens_point[i][1], \
- self.max_dens_point[i][2], self.max_dens_point[i][3]]
- cp += counts[i + 1]
- index += 1
- # If there are missing groups at the end, add them.
- while index < self.group_count:
- self._groups[index] = self._halo_class(self, index, \
- size=self.group_sizes[index], CoM=self.CoM[index], \
- max_dens_point=self.max_dens_point[index], \
- group_total_mass=self.Tot_M[index],
- max_radius=self.max_radius[index],
- bulk_vel=self.bulk_vel[index], tasks=self.halo_taskmap[index],
- rms_vel=self.rms_vel[index])
- self.comm.do_not_claim_object(self._groups[index])
- self._max_dens[index] = [self.max_dens_point[index][0],
- self.max_dens_point[index][1], \
- self.max_dens_point[index][2], self.max_dens_point[index][3]]
- index += 1
- # Clean up
- del self.max_dens_point, self.max_radius, self.bulk_vel
- del self.halo_taskmap, self.tags, self.rms_vel
- del grab_indices, unique_ids, counts
- try:
- del group_indices
- except UnboundLocalError:
- pass
-
- def __len__(self):
- return self.group_count
-
- def write_out(self, filename="parallelHopAnalysis.out", ellipsoid_data=False):
- r"""Write out standard halo information to a text file.
-
- Parameters
- ----------
- filename : String
- The name of the file to write to.
- Default = "parallelHopAnalysis.out".
-
- Examples
- --------
- >>> halos.write_out("parallelHopAnalysis.out")
- """
- HaloList.write_out(self, filename, ellipsoid_data)
-
-
class GenericHaloFinder(HaloList, ParallelAnalysisInterface):
def __init__(self, ds, data_source, dm_only=True, padding=0.0):
ParallelAnalysisInterface.__init__(self)
@@ -2007,334 +1650,6 @@
self.write_particle_lists(basename)
self.write_particle_lists_txt(basename)
-
-class parallelHF(GenericHaloFinder, parallelHOPHaloList):
- r"""Parallel HOP halo finder.
-
- Halos are built by:
- 1. Calculating a density for each particle based on a smoothing kernel.
- 2. Recursively linking particles to other particles from lower density
- particles to higher.
- 3. Geometrically proximate chains are identified and
- 4. merged into final halos following merging rules.
-
- Lower thresholds generally produce more halos, and the largest halos
- become larger. Also, halos become more filamentary and over-connected.
-
- This is very similar to HOP, but it does not produce precisely the
- same halos due to unavoidable numerical differences.
-
- Skory et al. "Parallel HOP: A Scalable Halo Finder for Massive
- Cosmological Data Sets." arXiv (2010) 1001.3411
-
- Parameters
- ----------
- ds : `Dataset`
- The dataset on which halo finding will be conducted.
- threshold : float
- The density threshold used when building halos. Default = 160.0.
- dm_only : bool
- If True, only dark matter particles are used when building halos.
- Default = True.
- resize : bool
- Turns load-balancing on or off. Default = True.
- kdtree : string
- Chooses which kD Tree to use. The Fortran one (kdtree = 'F') is
- faster, but uses more memory. The Cython one (kdtree = 'C') is
- slower but is more memory efficient.
- Default = 'F'
- rearrange : bool
- Turns on faster nearest neighbor searches at the cost of increased
- memory usage.
- This option only applies when using the Fortran tree.
- Default = True.
- fancy_padding : bool
- True calculates padding independently for each face of each
- subvolume. Default = True.
- safety : float
- Due to variances in inter-particle spacing in the volume, the
- padding may need to be increased above the raw calculation.
- This number is multiplied to the calculated padding, and values
- >1 increase the padding. Default = 1.5.
- premerge : bool
- True merges chains in two steps (rather than one with False), which
- can speed up halo finding by 25% or more. However, True can result
- in small (<<1%) variations in the final halo masses when compared
- to False. Default = True.
- sample : float
- The fraction of the full dataset on which load-balancing is
- performed. Default = 0.03.
- total_mass : float
- If HOP is run on the same dataset mulitple times, the total mass
- of particles in Msun units in the full volume can be supplied here
- to save time.
- This must correspond to the particles being operated on, meaning
- if stars are included in the halo finding, they must be included
- in this mass as well, and visa-versa.
- If halo finding on a subvolume, this still corresponds with the
- mass in the entire volume.
- Default = None, which means the total mass is automatically
- calculated.
- num_particles : integer
- The total number of particles in the volume, in the same fashion
- as `total_mass` is calculated. Specifying this turns off
- fancy_padding.
- Default = None, which means the number of particles is
- automatically calculated.
-
- Examples
- -------
- >>> ds = load("RedshiftOutput0000")
- >>> halos = parallelHF(ds)
- """
- def __init__(self, ds, subvolume=None, threshold=160, dm_only=True, \
- resize=True, rearrange=True,\
- fancy_padding=True, safety=1.5, premerge=True, sample=0.03, \
- total_mass=None, num_particles=None, tree='F'):
- if subvolume is not None:
- ds_LE = np.array(subvolume.left_edge)
- ds_RE = np.array(subvolume.right_edge)
- self._data_source = ds.all_data()
- GenericHaloFinder.__init__(self, ds, self._data_source, dm_only,
- padding=0.0)
- self.padding = 0.0
- self.num_neighbors = 65
- self.safety = safety
- self.sample = sample
- self.tree = tree
- if self.tree != 'F' and self.tree != 'C':
- mylog.error("No kD Tree specified!")
- period = ds.domain_right_edge - ds.domain_left_edge
- topbounds = np.array([[0., 0., 0.], period])
- # Cut up the volume evenly initially, with no padding.
- padded, LE, RE, self._data_source = \
- self.partition_index_3d(ds=self._data_source,
- padding=self.padding)
- # also get the total mass of particles
- yt_counters("Reading Data")
- # Adaptive subregions by bisection. We do not load balance if we are
- # analyzing a subvolume.
- ds_names = ["particle_position_x", "particle_position_y",
- "particle_position_z"]
- if ytcfg.getboolean("yt", "inline") == False and \
- resize and self.comm.size != 1 and subvolume is None:
- random.seed(self.comm.rank)
- cut_list = self.partition_index_3d_bisection_list()
- root_points = self._subsample_points()
- self.bucket_bounds = []
- if self.comm.rank == 0:
- self._recursive_divide(root_points, topbounds, 0, cut_list)
- self.bucket_bounds = \
- self.comm.mpi_bcast(self.bucket_bounds)
- my_bounds = self.bucket_bounds[self.comm.rank]
- LE, RE = my_bounds[0], my_bounds[1]
- self._data_source = self.ds.region([0.] * 3, LE, RE)
- # If this isn't parallel, define the region as an AMRRegionStrict so
- # particle IO works.
- if self.comm.size == 1:
- self._data_source = self.ds.region([0.5] * 3,
- LE, RE)
- # get the average spacing between particles for this region
- # The except is for the serial case where the full box is what we want.
- if num_particles is None:
- data = self._data_source["particle_position_x"]
- try:
- l = self._data_source.right_edge - self._data_source.left_edge
- except AttributeError:
- l = ds.domain_right_edge - ds.domain_left_edge
- vol = l[0] * l[1] * l[2]
- full_vol = vol
- # We will use symmetric padding when a subvolume is being used.
- if not fancy_padding or subvolume is not None or \
- num_particles is not None:
- if num_particles is None:
- num_particles = data.size
- avg_spacing = (float(vol) / num_particles) ** (1. / 3.)
- # padding is a function of inter-particle spacing, this is an
- # approximation, but it's OK with the safety factor
- padding = (self.num_neighbors) ** (1. / 3.) * self.safety * \
- avg_spacing
- self.padding = (np.ones(3, dtype='float64') * padding,
- np.ones(3, dtype='float64') * padding)
- mylog.info('padding %s avg_spacing %f vol %f local_parts %d' % \
- (str(self.padding), avg_spacing, vol, num_particles))
- # Another approach to padding, perhaps more accurate.
- elif fancy_padding and self._distributed:
- LE_padding = np.empty(3, dtype='float64')
- RE_padding = np.empty(3, dtype='float64')
- avg_spacing = (vol / data.size) ** (1. / 3.)
- base_padding = (self.num_neighbors) ** (1. / 3.) * self.safety * \
- avg_spacing
- for dim in xrange(3):
- if ytcfg.getboolean("yt", "inline") == False:
- data = self._data_source[ds_names[dim]]
- else:
- data = self._data_source[ds_names[dim]]
- num_bins = 1000
- width = self._data_source.right_edge[dim] - \
- self._data_source.left_edge[dim]
- area = (self._data_source.right_edge[(dim + 1) % 3] - \
- self._data_source.left_edge[(dim + 1) % 3]) * \
- (self._data_source.right_edge[(dim + 2) % 3] - \
- self._data_source.left_edge[(dim + 2) % 3])
- bin_width = base_padding
- num_bins = int(math.ceil(width / bin_width))
- bins = np.arange(num_bins + 1, dtype='float64') * bin_width + \
- self._data_source.left_edge[dim]
- counts, bins = np.histogram(data, bins)
- # left side.
- start = 0
- count = counts[0]
- while count < self.num_neighbors:
- start += 1
- count += counts[start]
- # Get the avg spacing in just this boundary.
- vol = area * (bins[start + 1] - bins[0])
- avg_spacing = (float(vol) / count) ** (1. / 3.)
- LE_padding[dim] = (self.num_neighbors) ** (1. / 3.) * \
- self.safety * avg_spacing
- # right side.
- start = -1
- count = counts[-1]
- while count < self.num_neighbors:
- start -= 1
- count += counts[start]
- vol = area * (bins[-1] - bins[start - 1])
- avg_spacing = (float(vol) / count) ** (1. / 3.)
- RE_padding[dim] = (self.num_neighbors) ** (1. / 3.) * \
- self.safety * avg_spacing
- self.padding = (LE_padding, RE_padding)
- del bins, counts
- mylog.info('fancy_padding %s avg_spacing %f full_vol %f local_parts %d %s' % \
- (str(self.padding), avg_spacing, full_vol,
- data.size, str(self._data_source)))
- # Now we get the full box mass after we have the final composition of
- # subvolumes.
- if total_mass is None:
- total_mass = self.comm.mpi_allreduce((self._data_source["particle_mass"].in_units('Msun').astype('float64')).sum(),
- op='sum')
- if not self._distributed:
- self.padding = (np.zeros(3, dtype='float64'),
- np.zeros(3, dtype='float64'))
- # If we're using a subvolume, we now re-divide.
- if subvolume is not None:
- self._data_source = ds.region([0.] * 3, ds_LE, ds_RE)
- # Cut up the volume.
- padded, LE, RE, self._data_source = \
- self.partition_index_3d(ds=self._data_source,
- padding=0.)
- self.bounds = (LE, RE)
- (LE_padding, RE_padding) = self.padding
- parallelHOPHaloList.__init__(self, self._data_source, self.padding, \
- self.num_neighbors, self.bounds, total_mass, period, \
- threshold=threshold, dm_only=dm_only, rearrange=rearrange,
- premerge=premerge, tree=self.tree)
- self._join_halolists()
- yt_counters("Final Grouping")
-
- def _subsample_points(self):
- # Read in a random subset of the points in each domain, and then
- # collect them on the root task.
- xp = self._data_source["particle_position_x"]
- n_parts = self.comm.mpi_allreduce(xp.size, op='sum')
- local_parts = xp.size
- random_points = int(self.sample * n_parts)
- # We want to get a representative selection of random particles in
- # each subvolume.
- adjust = float(local_parts) / (float(n_parts) / self.comm.size)
- n_random = int(adjust * float(random_points) / self.comm.size)
- mylog.info("Reading in %d random particles." % n_random)
- # Get unique random particles.
- my_points = np.empty((n_random, 3), dtype='float64')
- uni = np.array(random.sample(xrange(xp.size), n_random))
- uni = uni[uni.argsort()]
- my_points[:, 0] = xp[uni]
- del xp
- self._data_source.clear_data()
- my_points[:, 1] = self._data_source["particle_position_y"][uni]
- self._data_source.clear_data()
- my_points[:, 2] = self._data_source["particle_position_z"][uni]
- self._data_source.clear_data()
- del uni
- # Collect them on the root task.
- mine, sizes = self.comm.mpi_info_dict(n_random)
- if mine == 0:
- tot_random = sum(sizes.values())
- root_points = np.empty((tot_random, 3), dtype='float64')
- root_points.shape = (1, 3 * tot_random)
- else:
- root_points = np.empty([])
- my_points.shape = (1, n_random * 3)
- root_points = self.comm.par_combine_object(my_points[0],
- datatype="array", op="cat")
- del my_points
- if mine == 0:
- root_points.shape = (tot_random, 3)
- return root_points
-
- def _recursive_divide(self, points, bounds, level, cut_list):
- dim = cut_list[level][0]
- parts = points.shape[0]
- num_bins = 1000
- width = bounds[1][dim] - bounds[0][dim]
- bin_width = width / num_bins
- bins = np.arange(num_bins + 1, dtype='float64') * bin_width + \
- bounds[0][dim]
- counts, bins = np.histogram(points[:, dim], bins)
- # Find the bin that passes the cut points.
- midpoints = [bounds[0][dim]]
- sum = 0
- bin = 0
- for step in xrange(1, cut_list[level][1]):
- while sum < ((parts * step) / cut_list[level][1]):
- lastsum = sum
- sum += counts[bin]
- bin += 1
- # Bin edges
- left_edge = bins[bin - 1]
- right_edge = bins[bin]
- # Find a better approx of the midpoint cut
- # line using a linear approx.
- a = float(sum - lastsum) / (right_edge - left_edge)
- midpoints.append(left_edge + (0.5 - \
- (float(lastsum) / parts / 2)) / a)
- midpoints.append(bounds[1][dim])
-
- # Split the points & update the bounds.
- subpoints = []
- subbounds = []
- for pair in zip(midpoints[:-1], midpoints[1:]):
- select = np.bitwise_and(points[:, dim] >= pair[0],
- points[:, dim] < pair[1])
- subpoints.append(points[select])
- nb = bounds.copy()
- nb[0][dim] = pair[0]
- nb[1][dim] = pair[1]
- subbounds.append(nb)
- # If we're at the maxlevel, make a bucket. Otherwise, recurse down.
- maxlevel = len(cut_list) - 1
- for pair in zip(subpoints, subbounds):
- if level == maxlevel:
- self.bucket_bounds.append(pair[1])
- else:
- self._recursive_divide(pair[0], pair[1], level + 1, cut_list)
-
- def _join_halolists(self):
- if self.group_count == 0:
- mylog.info("There are no halos found.")
- return
- ms = -self.Tot_M.copy()
- del self.Tot_M
- Cx = self.CoM[:, 0].copy()
- sorted = np.lexsort([Cx, ms])
- del Cx, ms
- self._groups = self._groups[sorted]
- self._max_dens = self._max_dens[sorted]
- for i in xrange(self.group_count):
- self._groups[i].id = i
- del sorted, self.group_sizes, self.CoM
-
-
class HOPHaloFinder(GenericHaloFinder, HOPHaloList):
r"""HOP halo finder.
This diff is so big that we needed to truncate the remainder.
https://bitbucket.org/yt_analysis/yt/commits/cf5e6f52f838/
Changeset: cf5e6f52f838
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-27 11:01:35
Summary: Moving remaining merger tree code to its new home.
Affected #: 2 files
diff -r 035187f2fd3243b0be04fcc7e6e9997bc9226d4a -r cf5e6f52f838ac2ef6d312cbfe546bc975b6112a yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
--- /dev/null
+++ b/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
@@ -0,0 +1,801 @@
+"""
+A very simple, purely-serial, merger tree script that knows how to parse FOF
+catalogs, either output by Enzo or output by yt's FOF halo finder, and then
+compare parent/child relationships.
+
+
+
+"""
+
+#-----------------------------------------------------------------------------
+# Copyright (c) 2013, yt Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+# plot_halo_evolution() gives a good full example of how to use the framework
+
+# First pass at a simplified merger tree
+#
+# Basic outline:
+#
+# 1. Halo find inline, obtaining particle catalogs
+# 2. Load dataset at time t
+# 3. Load dataset at time t+1
+# 4. Parse catalogs for t and t+1
+# 5. Place halos for t+1 in kD-tree
+# 6. For every halo in t, execute ball-query with some linking length
+# 7. For every halo in ball-query result, execute numpy's intersect1d on
+# particle IDs
+# 8. Parentage is described by a fraction of particles that pass from one to
+# the other; we have both descendent fractions and ancestory fractions.
+
+
+import numpy as np
+import h5py
+import time
+import pdb
+import cPickle
+import glob
+
+from yt.funcs import *
+from yt.extern.pykdtree import KDTree
+import yt.extern.pydot as pydot
+
+# We don't currently use this, but we may again find a use for it in the
+# future.
+class MaxLengthDict(dict):
+ def __init__(self, *args, **kwargs):
+ dict.__init__(self, *args, **kwargs)
+ self.order = [None] * 50
+
+ def __setitem__(self, key, val):
+ if key not in self.order:
+ to_remove = self.order.pop(0)
+ self.pop(to_remove, None)
+ self.order.append(key)
+ dict.__setitem__(self, key, val)
+
+ def __getitem__(self, key):
+ if key in self.order:
+ self.order.pop(self.order.index(key))
+ self.order.append(key)
+ return dict.__getitem__(self, key)
+
+ def __delitem__(self, key):
+ dict.__delitem__(self, key)
+ self.order.pop(self.order.index(key))
+ self.order.insert(0, None)
+
+class HaloCatalog(object):
+ r"""A catalog of halos, parsed from EnzoFOF outputs.
+
+ This class will read in catalogs output by the Enzo FOF halo finder and
+ make available their positions, radii, etc. Enzo FOF was provided
+ starting with 2.0, and can be run either inline (with the correct
+ options) or as a postprocessing step using the `-F` command line
+ option. This class is mostly useful when calculating a merger tree,
+ and when the particle IDs for members of a given halo are output as
+ well.
+
+ Parameters
+ ----------
+ output_id : int
+ This is the integer output id of the halo catalog to parse and
+ load.
+ cache : bool
+ Should we store, in between accesses, the particle IDs? If set to
+ true, the correct particle files must exist.
+ external_FOF : bool, optional
+ Are we building a tree from outputs generated by an
+ external FOF program, or an FOF internal to yt?
+ FOF_directory : str, optional
+ Directory where FOF files are located
+ """
+ cache = None
+ def __init__(self, output_id, cache = True, external_FOF=True, FOF_directory="FOF"):
+ self.output_id = output_id
+ self.external_FOF = external_FOF
+ self.redshift = 0.0
+ self.FOF_directory = FOF_directory
+ self.particle_file = h5py.File("%s/particles_%05i.h5" % \
+ (FOF_directory, output_id), "r")
+ if self.external_FOF:
+ self.parse_halo_catalog_external()
+ else:
+ self.parse_halo_catalog_internal()
+ if cache: self.cache = dict()#MaxLengthDict()
+
+ def __del__(self):
+ self.particle_file.close()
+
+ def parse_halo_catalog_external(self):
+ hp = []
+ for line in open("%s/groups_%05i.dat" % \
+ (self.FOF_directory, self.output_id)):
+ if line.strip() == "": continue # empty
+ if line.startswith("# Red"):
+ self.redshift = float(line.split("=")[1])
+ if line[0] == "#": continue # comment
+ if line[0] == "d": continue # datavar
+ x,y,z = [float(f) for f in line.split(None, 3)[:-1]]
+ hp.append([x,y,z])
+ if hp != []:
+ self.halo_positions = np.array(hp)
+ self.halo_kdtree = KDTree(self.halo_positions)
+ else:
+ self.halo_positions = None
+ self.halo_kdtree = None
+ return hp
+
+ def parse_halo_catalog_internal(self):
+ """
+ This parser works on the files output directly out of yt's internal
+ halo_finder. The parse_halo_catalog_external works with an
+ external version of FOF.
+
+ Examples
+ --------
+ ds = load("DD0000/DD0000")
+ halo_list = FOFHaloFinder(ds)
+ halo_list.write_out("FOF/groups_00000.txt")
+ halos_COM = parse_halo_catalog_internal()
+ """
+ hp = []
+ for line in open("%s/groups_%05i.txt" % \
+ (self.FOF_directory, self.output_id)):
+ if line.startswith("# RED"):
+ self.redshift = float(line.split("=")[1])
+ continue
+ if line.strip() == "": continue # empty
+ if line[0] == "#": continue # comment
+ x,y,z = [float(f) for f in line.split()[7:10]] # COM x,y,z
+ hp.append([x,y,z])
+ if hp != []:
+ self.halo_positions = np.array(hp)
+ self.halo_kdtree = KDTree(self.halo_positions)
+ else:
+ self.halo_positions = None
+ self.halo_kdtree = None
+ return hp
+
+ def read_particle_ids(self, halo_id):
+ if self.cache is not None:
+ if halo_id not in self.cache:
+ if self.external_FOF:
+ self.cache[halo_id] = \
+ self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
+ else:
+ self.cache[halo_id] = \
+ self.particle_file["/Halo%08i/particle_index" % halo_id][:]
+ ids = self.cache[halo_id]
+ else:
+ if self.external_FOF:
+ ids = self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
+ else:
+ ids = self.particle_file["/Halo%08i/particle_index" % halo_id][:]
+ return HaloParticleList(halo_id, self.halo_positions[halo_id,:], ids)
+
+ def calculate_parentage_fractions(self, other_catalog, radius = 0.10):
+ parentage_fractions = {}
+ if self.halo_positions == None or other_catalog.halo_positions == None:
+ return parentage_fractions
+ mylog.debug("Ball-tree query with radius %0.3e", radius)
+ all_nearest = self.halo_kdtree.query_ball_tree(
+ other_catalog.halo_kdtree, radius)
+ pbar = get_pbar("Halo Mergers", self.halo_positions.shape[0])
+ for hid1, nearest in enumerate(all_nearest):
+ pbar.update(hid1)
+ parentage_fractions[hid1] = {}
+ HPL1 = self.read_particle_ids(hid1)
+ for hid2 in sorted(nearest):
+ HPL2 = other_catalog.read_particle_ids(hid2)
+ p1, p2 = HPL1.find_relative_parentage(HPL2)
+ parentage_fractions[hid1][hid2] = (p1, p2, HPL2.number_of_particles)
+ parentage_fractions[hid1]["NumberOfParticles"] = HPL1.number_of_particles
+ pbar.finish()
+ return parentage_fractions
+
+class HaloParticleList(object):
+ def __init__(self, halo_id, position, particle_ids):
+ self.halo_id = halo_id
+ self.position = np.array(position)
+ self.particle_ids = particle_ids
+ self.number_of_particles = particle_ids.size
+
+ def find_nearest(self, other_tree, radius = 0.10):
+ return other_tree.query_ball_point(self.position, radius)
+
+ def find_relative_parentage(self, child):
+ # Return two values: percent this halo gave to the other, and percent
+ # of the other that comes from this halo
+ overlap = np.intersect1d(self.particle_ids, child.particle_ids).size
+ of_child_from_me = float(overlap)/child.particle_ids.size
+ of_mine_from_me = float(overlap)/self.particle_ids.size
+ return of_child_from_me, of_mine_from_me
+
+class EnzoFOFMergerBranch(object):
+ def __init__(self, tree, output_num, halo_id, max_children,
+ min_relation=0.25):
+ self.output_num = output_num
+ self.halo_id = halo_id
+ self.npart = tree.relationships[output_num][halo_id]["NumberOfParticles"]
+ self.children = []
+ self.progenitor = -1
+ max_relationship = 0.0
+ halo_count = 0
+ sorted_keys = sorted(tree.relationships[output_num][halo_id].keys())
+ for k in sorted_keys:
+ if not str(k).isdigit(): continue
+ v = tree.relationships[output_num][halo_id][k]
+ if v[1] > min_relation and halo_count < max_children:
+ halo_count += 1
+ self.children.append((k,v[1],v[2]))
+ if v[1] > max_relationship:
+ self.progenitor = k
+ max_relationship = v[1]
+
+class EnzoFOFMergerTree(object):
+ r"""Calculates the parentage relationships for halos for a series of
+ outputs, using the framework provided in enzofof_merger_tree.
+
+ Parameters
+ ----------
+ zrange : tuple
+ This is the redshift range (min, max) to calculate the
+ merger tree. E.g. (0, 2) for z=2 to z=0
+ cycle_range : tuple, optional
+ This is the cycle number range (min, max) to calculate the
+ merger tree. If both zrange and cycle_number given,
+ ignore zrange.
+ output : bool, optional
+ If provided, both .cpkl and .txt files containing the parentage
+ relationships will be output.
+ load_saved : bool, optional
+ Flag to load previously saved parental relationships
+ save_filename : str, optional
+ Filename to save parental relationships
+ external_FOF : bool, optional
+ Are we building a tree from outputs generated by an
+ external FOF program, or an FOF internal to yt?
+ FOF_directory : str, optional
+ Directory where FOF files are located, note that the files
+ must be named according to the syntax: groups_DDDDD.txt for
+ internal yt outputs, and groups_DDDDD.dat for external FOF outputs.
+ where DDDDD are digits representing the equivalent cycle number.
+ e.g. groups_00000.txt
+
+ Examples
+ --------
+ mt = EnzoFOFMergerTree() # by default it grabs every DD in FOF dir
+ mt.build_tree(0) # Create tree for halo 0
+ mt.print_tree()
+ mt.write_dot()
+
+ See Also
+ --------
+ plot_halo_evolution()
+ """
+ def __init__(self, zrange=None, cycle_range=None, output=False,
+ load_saved=False, save_filename="merger_tree.cpkl",
+ external_FOF=True, FOF_directory="FOF"):
+
+ self.relationships = {}
+ self.redshifts = {}
+ self.external_FOF = external_FOF
+ self.FOF_directory = FOF_directory
+ if load_saved:
+ self.load_tree("%s/%s" % (self.FOF_directory, save_filename))
+ # make merger tree work within specified cycle/z limits
+ # on preloaded halos
+ if zrange is not None:
+ self.select_redshifts(zrange)
+ if cycle_range is not None:
+ self.select_cycles(cycle_range)
+ else:
+ self.find_outputs(zrange, cycle_range, output)
+ self.run_merger_tree(output)
+ self.save_tree("%s/%s" % (self.FOF_directory, save_filename))
+
+ def select_cycles(self, cycle_range):
+ """
+ Takes an existing tree and pares it to only include a subset of
+ cycles. Useful in paring a loaded tree.
+ """
+ # N.B. Does not delete info from self.relationships to save space
+ # just removes it from redshift dict for indexing
+ for cycle in self.redshifts.keys():
+ if cycle <= cycle_range[0] and cycle >= cycle_range[1]:
+ del self.redshifts[cycle]
+
+ def select_redshifts(self, zrange):
+ """
+ Takes an existing tree and pares it to only include a subset of
+ redshifts. Useful in paring a loaded tree.
+ """
+ # N.B. Does not delete info from self.relationships to save space
+ # just removes it from redshift dict for indexing
+ for redshift in self.redshifts.values():
+ if redshift <= zrange[0] and redshift >= zrange[1]:
+ # some reverse lookup magic--assumes unique cycle/z pairs
+ cycle = [key for key,value in mt.redshifts.items() \
+ if value == redshift][0]
+ del self.redshifts[cycle]
+
+ def save_tree(self, filename):
+ cPickle.dump((self.redshifts, self.relationships),
+ open(filename, "wb"))
+
+ def load_tree(self, filename):
+ self.redshifts, self.relationships = \
+ cPickle.load(open(filename, "rb"))
+
+ def clear_data(self):
+ r"""Deletes previous merger tree, but keeps parentage
+ relationships.
+ """
+ del self.levels
+
+ def find_outputs(self, zrange, cycle_range, output):
+ self.numbers = []
+ if self.external_FOF:
+ filenames = "%s/groups_*.dat" % (self.FOF_directory)
+ files = glob.glob(filenames)
+ else:
+ filenames = "%s/groups_*.txt" % (self.FOF_directory)
+ files = glob.glob(filenames)
+ # If using redshift range, load redshifts only
+ for f in files:
+ num = int(f[-9:-4])
+ if zrange is not None:
+ HC = HaloCatalog(num, external_FOF=self.external_FOF, \
+ FOF_directory=self.FOF_directory)
+ # Allow for some epsilon
+ diff1 = (HC.redshift - zrange[0]) / zrange[0]
+ diff2 = (HC.redshift - zrange[1]) / zrange[1]
+ if diff1 >= -1e-3 and diff2 <= 1e-3:
+ self.numbers.append(num)
+ del HC
+ elif cycle_range is not None:
+ if num >= cycle_range[0] and num <= cycle_range[1]:
+ self.numbers.append(num)
+ else:
+ self.numbers.append(num)
+ self.numbers.sort()
+
+ def run_merger_tree(self, output):
+ # Run merger tree for all outputs, starting with the last output
+ for i in range(len(self.numbers)-1, 0, -1):
+ if output:
+ output = "%s/tree-%5.5d-%5.5d" % \
+ (self.FOF_directory, self.numbers[i], self.numbers[i-1])
+ else:
+ output = None
+ z0, z1, fr = find_halo_relationships(self.numbers[i], \
+ self.numbers[i-1], \
+ output_basename=output, \
+ external_FOF=self.external_FOF,
+ FOF_directory=self.FOF_directory)
+ self.relationships[self.numbers[i]] = fr
+ self.redshifts[self.numbers[i]] = z0
+ # Fill in last redshift
+ self.redshifts[self.numbers[0]] = z1
+
+ def build_tree(self, halonum, min_particles=0, max_children=1e20):
+ r"""Builds a merger tree, starting at the last output.
+
+ Parameters
+ ----------
+ halonum : int
+ Halo number in the last output to analyze.
+ min_particles : int, optional
+ Minimum number of particles of halos in tree.
+ max_children : int, optional
+ Maximum number of child halos each leaf can have.
+ """
+ self.halonum = halonum
+ self.max_children = max_children
+ self.output_numbers = sorted(self.relationships, reverse=True)
+ self.levels = {}
+ trunk = self.output_numbers[0]
+ self.levels[trunk] = [EnzoFOFMergerBranch(self, trunk, halonum,
+ max_children)]
+ self.generate_tree(min_particles, max_children)
+
+ def filter_small_halos(self, lvl, min_particles):
+ # Filter out children with less than min_particles
+ for h in self.levels[lvl]:
+ fil = []
+ for c in h.children:
+ if c[2] > min_particles: # c[2] = npart
+ fil.append(c)
+ h.children = fil
+
+ def generate_tree(self, min_particles, max_children):
+ self.filter_small_halos(self.output_numbers[0], min_particles)
+ for i in range(1,len(self.output_numbers)):
+ prev = self.output_numbers[i-1]
+ this = self.output_numbers[i]
+ self.levels[this] = []
+ this_halos = [] # To check for duplicates
+ for h in self.levels[prev]:
+ for c in h.children:
+ if c[0] in this_halos: continue
+ if self.relationships[this] == {}: continue
+ branch = EnzoFOFMergerBranch(self, this, c[0],
+ max_children)
+ self.levels[this].append(branch)
+ this_halos.append(c[0])
+ self.filter_small_halos(this, min_particles)
+
+ def get_massive_progenitors(self, halonum, min_relation=0.25):
+ r"""Returns a list of the most massive progenitor halos.
+
+ This routine walks down the tree, following the most massive
+ progenitor on each node.
+
+ Parameters
+ ----------
+ halonum : int
+ Halo number at the last output to trace.
+
+ Returns
+ -------
+ output : dict
+ Dictionary of redshifts, cycle numbers, and halo numbers
+ of the most massive progenitor. keys = {redshift, cycle,
+ halonum}
+ """
+ output = {"redshift": [], "cycle": [], "halonum": []}
+ # First (lowest redshift) node in tree
+ halo0 = halonum
+ for cycle in sorted(self.numbers, reverse=True):
+ if cycle not in self.relationships: break
+ if halo0 not in self.relationships[cycle]: break
+ node = self.relationships[cycle][halo0]
+ output["redshift"].append(self.redshifts[cycle])
+ output["cycle"].append(cycle)
+ output["halonum"].append(halo0)
+ # Find progenitor
+ max_rel = 0.0
+ for k,v in node.items():
+ if not str(k).isdigit(): continue
+ if v[1] > max_rel and v[1] > min_relation:
+ halo0 = k
+ max_rel = v[1]
+ return output
+
+ def print_tree(self):
+ r"""Prints the merger tree to stdout.
+ """
+ for lvl in sorted(self.levels, reverse=True):
+ if lvl not in self.redshifts: continue
+ print "========== Cycle %5.5d (z=%f) ==========" % \
+ (lvl, self.redshifts[lvl])
+ for br in self.levels[lvl]:
+ print "Parent halo = %d" % br.halo_id
+ print "--> Most massive progenitor == Halo %d" % \
+ (br.progenitor)
+ for i,c in enumerate(br.children):
+ if i > self.max_children: break
+ print "--> Halo %8.8d :: fraction = %g" % (c[0], c[1])
+
+ def save_halo_evolution(self, filename):
+ """
+ Saves as an HDF5 file the relevant details about a halo
+ over the course of its evolution following the most massive
+ progenitor to have given it the bulk of its particles.
+ It stores info from the FOF_groups file: location, mass, id, etc.
+ """
+ f = h5py.File("%s/%s" % (self.FOF_directory, filename), 'a')
+ cycle_fin = self.redshifts.keys()[-1]
+ halo_id = self.levels[cycle_fin][0].halo_id
+ halo = "halo%05d" % halo_id
+ if halo in f:
+ del f["halo%05d" % halo_id]
+ g = f.create_group("halo%05d" % halo_id)
+ size = len(self.redshifts)
+ cycle = np.zeros(size)
+ redshift = np.zeros(size)
+ halo_id = np.zeros(size)
+ fraction = np.zeros(size)
+ mass = np.zeros(size)
+ densest_point = np.zeros((3,size))
+ COM = np.zeros((6,size))
+ fraction[0] = 1.
+
+ for i, lvl in enumerate(sorted(self.levels, reverse=True)):
+ if len(self.levels[lvl]) == 0: # lineage for this halo ends
+ cycle = cycle[:i] # so truncate arrays, and break
+ redshift = redshift[:i] # Not big enough.
+ halo_id = halo_id[:i]
+ fraction = fraction[:i]
+ mass = mass[:i]
+ densest_point = densest_point[:,:i]
+ COM = COM[:,:i]
+ break
+ if lvl not in self.redshifts: continue
+ mylog.info("========== Cycle %5.5d (z=%f) ==========" % \
+ (lvl, self.redshifts[lvl]))
+ cycle[i] = lvl
+ redshift[i] = self.redshifts[lvl]
+
+ br = self.levels[lvl][0]
+ mylog.info("Parent halo = %d" % br.halo_id)
+ mylog.info("-> Most massive progenitor == Halo %d" % (br.progenitor))
+ halo_id[i] = br.halo_id
+
+ if len(br.children) == 0: # lineage for this halo ends
+ cycle = cycle[:i+1] # (no children)
+ redshift = redshift[:i+1] # so truncate arrays, and break
+ halo_id = halo_id[:i+1]
+ fraction = fraction[:i+1]
+ mass = mass[:i+1]
+ densest_point = densest_point[:,:i+1]
+ COM = COM[:,:i+1]
+ break
+
+ if i < size-1:
+ fraction[i+1] = br.children[0][1]
+
+ # open up FOF file to parse for details
+ filename = "%s/groups_%05d.txt" % (self.FOF_directory, lvl)
+ mass[i], densest_point[:,i], COM[:,i] = \
+ grab_FOF_halo_info_internal(filename, br.halo_id)
+
+ # save the arrays in the hdf5 file
+ g.create_dataset("cycle", data=cycle)
+ g.create_dataset("redshift", data=redshift)
+ g.create_dataset("halo_id", data=halo_id)
+ g.create_dataset("fraction", data=fraction)
+ g.create_dataset("mass", data=mass)
+ g.create_dataset("densest_point", data=densest_point)
+ g.create_dataset("COM", data=COM)
+ f.close()
+
+ def write_dot(self, filename=None):
+ r"""Writes merger tree to a GraphViz or image file.
+
+ Parameters
+ ----------
+ filename : str, optional
+ Filename to write the GraphViz file. Default will be
+ tree_halo%05i.gv, which is a text file in the GraphViz format.
+ If filename is an image (e.g. "MergerTree.png") the output will
+ be in the appropriate image format made by calling GraphViz
+ automatically. See GraphViz (e.g. "dot -v")
+ for a list of available output formats.
+ """
+ if filename == None:
+ filename = "%s/tree_halo%5.5d.gv" % \
+ (self.FOF_directory, self.halonum)
+ # Create the pydot graph object.
+ self.graph = pydot.Dot('galaxy', graph_type='digraph')
+ self.halo_shape = "rect"
+ self.z_shape = "plaintext"
+ # Subgraphs to align levels
+ self.subgs = {}
+ for num in self.numbers:
+ self.subgs[num] = pydot.Subgraph('', rank = 'same')
+ self.graph.add_subgraph(self.subgs[num])
+ sorted_lvl = sorted(self.levels, reverse=True)
+ for ii,lvl in enumerate(sorted_lvl):
+ # Since we get the cycle number from the key, it won't
+ # exist for the last level, i.e. children of last level.
+ # Get it from self.numbers.
+ if ii < len(sorted_lvl)-1:
+ next_lvl = sorted_lvl[ii+1]
+ else:
+ next_lvl = self.numbers[0]
+ for br in self.levels[lvl]:
+ for c in br.children:
+ color = "red" if c[0] == br.progenitor else "black"
+ self.graph.add_edge(pydot.Edge("C%d_H%d" %(lvl, br.halo_id),
+ "C%d_H%d" % (next_lvl, c[0]), color=color))
+ #line = " C%d_H%d -> C%d_H%d [color=%s];\n" % \
+ # (lvl, br.halo_id, next_lvl, c[0], color)
+
+ #fp.write(line)
+ last_level = (ii,lvl)
+ for ii,lvl in enumerate(sorted_lvl):
+ npart_max = 0
+ for br in self.levels[lvl]:
+ if br.npart > npart_max: npart_max = br.npart
+ for br in self.levels[lvl]:
+ halo_str = "C%d_H%d" % (lvl, br.halo_id)
+ style = "filled" if br.npart == npart_max else "solid"
+ self.graph.add_node(pydot.Node(halo_str,
+ label = "Halo %d\\n%d particles" % (br.halo_id, br.npart),
+ style = style, shape = self.halo_shape))
+ # Add this node to the correct level subgraph.
+ self.subgs[lvl].add_node(pydot.Node(halo_str))
+ for lvl in self.numbers:
+ # Don't add the z if there are no halos already in the subgraph.
+ if len(self.subgs[lvl].get_node_list()) == 0: continue
+ self.subgs[lvl].add_node(pydot.Node("%1.5e" % self.redshifts[lvl],
+ shape = self.z_shape, label = "z=%0.3f" % self.redshifts[lvl]))
+ # Based on the suffix of the file name, write out the result to a file.
+ suffix = filename.split(".")[-1]
+ if suffix == "gv": suffix = "raw"
+ mylog.info("Writing %s format %s to disk." % (suffix, filename))
+ self.graph.write("%s" % filename, format=suffix)
+
+def find_halo_relationships(output1_id, output2_id, output_basename = None,
+ radius = 0.10, external_FOF=True,
+ FOF_directory='FOF'):
+ r"""Calculate the parentage and child relationships between two EnzoFOF
+ halo catalogs.
+
+ This function performs a very simple merger tree calculation between two
+ sets of halos. For every halo in the second halo catalog, it looks to the
+ first halo catalog to find the parents by looking at particle IDs. The
+ particle IDs from the child halos are identified in potential parents, and
+ then both percent-of-parent and percent-to-child values are recorded.
+
+ Note that this works with catalogs constructed by Enzo's FOF halo
+ when used in external_FOF=True mode, whereas it will work with
+ catalogs constructed by yt using external_FOF=False mode.
+
+ Parameters
+ ----------
+ output1_id : int
+ This is the integer output id of the (first) halo catalog to parse and
+ load.
+ output2_id : int
+ This is the integer output id of the (second) halo catalog to parse and
+ load.
+ output_basename : string
+ If provided, both .cpkl and .txt files containing the parentage
+ relationships will be output.
+ radius : float, default to 0.10
+ In absolute units, the radius to examine when guessing possible
+ parent/child relationships. If this value is too small, you will miss
+ possible relationships.
+ FOF_directory : str, optional
+ Directory where FOF files are located
+
+ Returns
+ -------
+ pfrac : dict
+ This is a dict of dicts. The first key is the parent halo id, the
+ second is the child halo id. The values are the percent contributed
+ from parent to child and the percent of a child that came from the
+ parent.
+ """
+ mylog.info("Parsing Halo Catalog %04i", output1_id)
+ HC1 = HaloCatalog(output1_id, False, external_FOF=external_FOF, \
+ FOF_directory=FOF_directory)
+ mylog.info("Parsing Halo Catalog %04i", output2_id)
+ HC2 = HaloCatalog(output2_id, True, external_FOF=external_FOF, \
+ FOF_directory=FOF_directory)
+ mylog.info("Calculating fractions")
+ pfrac = HC1.calculate_parentage_fractions(HC2)
+
+ if output_basename is not None and pfrac != {}:
+ f = open("%s.txt" % (output_basename), "w")
+ for hid1 in sorted(pfrac):
+ for hid2 in sorted(pfrac[hid1]):
+ if not str(hid2).isdigit(): continue
+ p1, p2, npart = pfrac[hid1][hid2]
+ if p1 == 0.0: continue
+ f.write( "Halo %s (%s) contributed %0.3e of its particles to %s (%s), which makes up %0.3e of that halo\n" % (
+ hid1, output1_id, p2, hid2, output2_id, p1))
+ f.close()
+
+ cPickle.dump(pfrac, open("%s.cpkl" % (output_basename), "wb"))
+
+ return HC1.redshift, HC2.redshift, pfrac
+
+def grab_FOF_halo_info_internal(filename, halo_id):
+ """
+ Finds a specific halo's information in the FOF group output information
+ and pass relevant parameters to caller.
+ """
+ # open up FOF file to parse for details
+ groups_file = open(filename, 'r')
+ for line in groups_file:
+ if line.startswith("#"): continue
+ if int(line.split()[0]) == halo_id:
+ ar = np.array(line.split()).astype('float64')
+ return ar[1], ar[4:7], ar[7:13] # mass, xyz_dens, xyzvxvyvz_COM
+
+def plot_halo_evolution(filename, halo_id, x_quantity='cycle', y_quantity='mass',
+ x_log=False, y_log=True, FOF_directory='FOF'):
+ """
+ Once you have generated a file using the
+ EnzoFOFMergerTree.save_halo_evolution function, this is a simple way of
+ plotting the evolution in the quantities of that halo over its lifetime.
+
+ Parameters
+ ----------
+ filename : str
+ The filename to which you saved the hdf5 data from save_halo_evolution
+ halo_id : int
+ The halo in 'filename' that you want to follow
+ x_quantity, y_quantity : str, optional
+ The quantity that you want to plot as the x_coord (or y_coords).
+ Valid options are:
+
+ * cycle
+ * mass
+ * fraction
+ * halo_id
+ * redshift
+ * dense_x
+ * dense_y
+ * dense_z
+ * COM_x
+ * COM_y
+ * COM_z
+ * COM_vx
+ * COM_vy
+ * COM_vz
+
+ x_log, y_log : bool, optional
+ Do you want the x(y)-axis to be in log or linear?
+ FOF_directory : str, optional
+ Directory where FOF files (and hdf file) are located
+
+ Examples
+ --------
+
+ >>> # generates mass history plots for the 20 most massive halos at t_fin.
+ >>> ts = DatasetSeries.from_filenames("DD????/DD????")
+ >>> # long step--must run FOF on each DD, but saves outputs for later use
+ >>> for ds in ts:
+ ... halo_list = FOFHaloFinder(ds)
+ ... i = int(ds.basename[2:])
+ ... halo_list.write_out("FOF/groups_%05i.txt" % i)
+ ... halo_list.write_particle_lists("FOF/particles_%05i" % i)
+ ...
+ >>> mt = EnzoFOFMergerTree(external_FOF=False)
+ >>> for i in range(20):
+ ... mt.build_tree(i)
+ ... mt.save_halo_evolution('halos.h5')
+ ...
+ >>> for i in range(20):
+ ... plot_halo_evolution('halos.h5', i)
+ """
+ import matplotlib.pyplot as plt
+ f = h5py.File("%s/%s" % (FOF_directory, filename), 'r')
+ basename = os.path.splitext(filename)[0]
+ halo = "halo%05d" % halo_id
+ basename = basename + "_" + halo
+ g = f[halo]
+ values = list(g)
+ index_dict = {'x' : 0, 'y' : 1, 'z' : 2, 'vx' : 3, 'vy' : 4, 'vz' : 5}
+ coords = {}
+ fields = {}
+ for i, quantity in enumerate((x_quantity, y_quantity)):
+ field = quantity
+ if quantity.startswith('COM'):
+ index = index_dict[quantity.split('_')[-1]]
+ quantity = ('COM')
+ if quantity.startswith('dense'):
+ index = index_dict[quantity.split('_')[-1]]
+ quantity = ('densest_point')
+ if quantity not in values:
+ exit('%s not in list of values in %s for halo %d' % \
+ (quantity, filename, halo_id))
+ if not field == quantity:
+ coords[i] = g[quantity][index,:]
+ else:
+ coords[i] = g[quantity]
+ if len(coords[i]) == 1:
+ # ("Only 1 value for Halo %d. Ignoring." % halo_id)
+ return
+ fields[i] = field
+
+ ax = plt.axes()
+ ax.plot(coords[0], coords[1])
+ ax.set_title(basename)
+ ax.set_xlabel(fields[0])
+ ax.set_ylabel(fields[1])
+ if x_log:
+ ax.set_xscale("log")
+ if y_log:
+ ax.set_yscale("log")
+ ofn = "%s/%s_%s_%s.png" % (FOF_directory, basename, fields[0], fields[1])
+ plt.savefig(ofn)
+ plt.clf()
diff -r 035187f2fd3243b0be04fcc7e6e9997bc9226d4a -r cf5e6f52f838ac2ef6d312cbfe546bc975b6112a yt/analysis_modules/halo_merger_tree/enzofof_merger_tree.py
--- a/yt/analysis_modules/halo_merger_tree/enzofof_merger_tree.py
+++ /dev/null
@@ -1,801 +0,0 @@
-"""
-A very simple, purely-serial, merger tree script that knows how to parse FOF
-catalogs, either output by Enzo or output by yt's FOF halo finder, and then
-compare parent/child relationships.
-
-
-
-"""
-
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, yt Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-# plot_halo_evolution() gives a good full example of how to use the framework
-
-# First pass at a simplified merger tree
-#
-# Basic outline:
-#
-# 1. Halo find inline, obtaining particle catalogs
-# 2. Load dataset at time t
-# 3. Load dataset at time t+1
-# 4. Parse catalogs for t and t+1
-# 5. Place halos for t+1 in kD-tree
-# 6. For every halo in t, execute ball-query with some linking length
-# 7. For every halo in ball-query result, execute numpy's intersect1d on
-# particle IDs
-# 8. Parentage is described by a fraction of particles that pass from one to
-# the other; we have both descendent fractions and ancestory fractions.
-
-
-import numpy as np
-import h5py
-import time
-import pdb
-import cPickle
-import glob
-
-from yt.funcs import *
-from yt.extern.pykdtree import KDTree
-import yt.extern.pydot as pydot
-
-# We don't currently use this, but we may again find a use for it in the
-# future.
-class MaxLengthDict(dict):
- def __init__(self, *args, **kwargs):
- dict.__init__(self, *args, **kwargs)
- self.order = [None] * 50
-
- def __setitem__(self, key, val):
- if key not in self.order:
- to_remove = self.order.pop(0)
- self.pop(to_remove, None)
- self.order.append(key)
- dict.__setitem__(self, key, val)
-
- def __getitem__(self, key):
- if key in self.order:
- self.order.pop(self.order.index(key))
- self.order.append(key)
- return dict.__getitem__(self, key)
-
- def __delitem__(self, key):
- dict.__delitem__(self, key)
- self.order.pop(self.order.index(key))
- self.order.insert(0, None)
-
-class HaloCatalog(object):
- r"""A catalog of halos, parsed from EnzoFOF outputs.
-
- This class will read in catalogs output by the Enzo FOF halo finder and
- make available their positions, radii, etc. Enzo FOF was provided
- starting with 2.0, and can be run either inline (with the correct
- options) or as a postprocessing step using the `-F` command line
- option. This class is mostly useful when calculating a merger tree,
- and when the particle IDs for members of a given halo are output as
- well.
-
- Parameters
- ----------
- output_id : int
- This is the integer output id of the halo catalog to parse and
- load.
- cache : bool
- Should we store, in between accesses, the particle IDs? If set to
- true, the correct particle files must exist.
- external_FOF : bool, optional
- Are we building a tree from outputs generated by an
- external FOF program, or an FOF internal to yt?
- FOF_directory : str, optional
- Directory where FOF files are located
- """
- cache = None
- def __init__(self, output_id, cache = True, external_FOF=True, FOF_directory="FOF"):
- self.output_id = output_id
- self.external_FOF = external_FOF
- self.redshift = 0.0
- self.FOF_directory = FOF_directory
- self.particle_file = h5py.File("%s/particles_%05i.h5" % \
- (FOF_directory, output_id), "r")
- if self.external_FOF:
- self.parse_halo_catalog_external()
- else:
- self.parse_halo_catalog_internal()
- if cache: self.cache = dict()#MaxLengthDict()
-
- def __del__(self):
- self.particle_file.close()
-
- def parse_halo_catalog_external(self):
- hp = []
- for line in open("%s/groups_%05i.dat" % \
- (self.FOF_directory, self.output_id)):
- if line.strip() == "": continue # empty
- if line.startswith("# Red"):
- self.redshift = float(line.split("=")[1])
- if line[0] == "#": continue # comment
- if line[0] == "d": continue # datavar
- x,y,z = [float(f) for f in line.split(None, 3)[:-1]]
- hp.append([x,y,z])
- if hp != []:
- self.halo_positions = np.array(hp)
- self.halo_kdtree = KDTree(self.halo_positions)
- else:
- self.halo_positions = None
- self.halo_kdtree = None
- return hp
-
- def parse_halo_catalog_internal(self):
- """
- This parser works on the files output directly out of yt's internal
- halo_finder. The parse_halo_catalog_external works with an
- external version of FOF.
-
- Examples
- --------
- ds = load("DD0000/DD0000")
- halo_list = FOFHaloFinder(ds)
- halo_list.write_out("FOF/groups_00000.txt")
- halos_COM = parse_halo_catalog_internal()
- """
- hp = []
- for line in open("%s/groups_%05i.txt" % \
- (self.FOF_directory, self.output_id)):
- if line.startswith("# RED"):
- self.redshift = float(line.split("=")[1])
- continue
- if line.strip() == "": continue # empty
- if line[0] == "#": continue # comment
- x,y,z = [float(f) for f in line.split()[7:10]] # COM x,y,z
- hp.append([x,y,z])
- if hp != []:
- self.halo_positions = np.array(hp)
- self.halo_kdtree = KDTree(self.halo_positions)
- else:
- self.halo_positions = None
- self.halo_kdtree = None
- return hp
-
- def read_particle_ids(self, halo_id):
- if self.cache is not None:
- if halo_id not in self.cache:
- if self.external_FOF:
- self.cache[halo_id] = \
- self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
- else:
- self.cache[halo_id] = \
- self.particle_file["/Halo%08i/particle_index" % halo_id][:]
- ids = self.cache[halo_id]
- else:
- if self.external_FOF:
- ids = self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
- else:
- ids = self.particle_file["/Halo%08i/particle_index" % halo_id][:]
- return HaloParticleList(halo_id, self.halo_positions[halo_id,:], ids)
-
- def calculate_parentage_fractions(self, other_catalog, radius = 0.10):
- parentage_fractions = {}
- if self.halo_positions == None or other_catalog.halo_positions == None:
- return parentage_fractions
- mylog.debug("Ball-tree query with radius %0.3e", radius)
- all_nearest = self.halo_kdtree.query_ball_tree(
- other_catalog.halo_kdtree, radius)
- pbar = get_pbar("Halo Mergers", self.halo_positions.shape[0])
- for hid1, nearest in enumerate(all_nearest):
- pbar.update(hid1)
- parentage_fractions[hid1] = {}
- HPL1 = self.read_particle_ids(hid1)
- for hid2 in sorted(nearest):
- HPL2 = other_catalog.read_particle_ids(hid2)
- p1, p2 = HPL1.find_relative_parentage(HPL2)
- parentage_fractions[hid1][hid2] = (p1, p2, HPL2.number_of_particles)
- parentage_fractions[hid1]["NumberOfParticles"] = HPL1.number_of_particles
- pbar.finish()
- return parentage_fractions
-
-class HaloParticleList(object):
- def __init__(self, halo_id, position, particle_ids):
- self.halo_id = halo_id
- self.position = np.array(position)
- self.particle_ids = particle_ids
- self.number_of_particles = particle_ids.size
-
- def find_nearest(self, other_tree, radius = 0.10):
- return other_tree.query_ball_point(self.position, radius)
-
- def find_relative_parentage(self, child):
- # Return two values: percent this halo gave to the other, and percent
- # of the other that comes from this halo
- overlap = np.intersect1d(self.particle_ids, child.particle_ids).size
- of_child_from_me = float(overlap)/child.particle_ids.size
- of_mine_from_me = float(overlap)/self.particle_ids.size
- return of_child_from_me, of_mine_from_me
-
-class EnzoFOFMergerBranch(object):
- def __init__(self, tree, output_num, halo_id, max_children,
- min_relation=0.25):
- self.output_num = output_num
- self.halo_id = halo_id
- self.npart = tree.relationships[output_num][halo_id]["NumberOfParticles"]
- self.children = []
- self.progenitor = -1
- max_relationship = 0.0
- halo_count = 0
- sorted_keys = sorted(tree.relationships[output_num][halo_id].keys())
- for k in sorted_keys:
- if not str(k).isdigit(): continue
- v = tree.relationships[output_num][halo_id][k]
- if v[1] > min_relation and halo_count < max_children:
- halo_count += 1
- self.children.append((k,v[1],v[2]))
- if v[1] > max_relationship:
- self.progenitor = k
- max_relationship = v[1]
-
-class EnzoFOFMergerTree(object):
- r"""Calculates the parentage relationships for halos for a series of
- outputs, using the framework provided in enzofof_merger_tree.
-
- Parameters
- ----------
- zrange : tuple
- This is the redshift range (min, max) to calculate the
- merger tree. E.g. (0, 2) for z=2 to z=0
- cycle_range : tuple, optional
- This is the cycle number range (min, max) to calculate the
- merger tree. If both zrange and cycle_number given,
- ignore zrange.
- output : bool, optional
- If provided, both .cpkl and .txt files containing the parentage
- relationships will be output.
- load_saved : bool, optional
- Flag to load previously saved parental relationships
- save_filename : str, optional
- Filename to save parental relationships
- external_FOF : bool, optional
- Are we building a tree from outputs generated by an
- external FOF program, or an FOF internal to yt?
- FOF_directory : str, optional
- Directory where FOF files are located, note that the files
- must be named according to the syntax: groups_DDDDD.txt for
- internal yt outputs, and groups_DDDDD.dat for external FOF outputs.
- where DDDDD are digits representing the equivalent cycle number.
- e.g. groups_00000.txt
-
- Examples
- --------
- mt = EnzoFOFMergerTree() # by default it grabs every DD in FOF dir
- mt.build_tree(0) # Create tree for halo 0
- mt.print_tree()
- mt.write_dot()
-
- See Also
- --------
- plot_halo_evolution()
- """
- def __init__(self, zrange=None, cycle_range=None, output=False,
- load_saved=False, save_filename="merger_tree.cpkl",
- external_FOF=True, FOF_directory="FOF"):
-
- self.relationships = {}
- self.redshifts = {}
- self.external_FOF = external_FOF
- self.FOF_directory = FOF_directory
- if load_saved:
- self.load_tree("%s/%s" % (self.FOF_directory, save_filename))
- # make merger tree work within specified cycle/z limits
- # on preloaded halos
- if zrange is not None:
- self.select_redshifts(zrange)
- if cycle_range is not None:
- self.select_cycles(cycle_range)
- else:
- self.find_outputs(zrange, cycle_range, output)
- self.run_merger_tree(output)
- self.save_tree("%s/%s" % (self.FOF_directory, save_filename))
-
- def select_cycles(self, cycle_range):
- """
- Takes an existing tree and pares it to only include a subset of
- cycles. Useful in paring a loaded tree.
- """
- # N.B. Does not delete info from self.relationships to save space
- # just removes it from redshift dict for indexing
- for cycle in self.redshifts.keys():
- if cycle <= cycle_range[0] and cycle >= cycle_range[1]:
- del self.redshifts[cycle]
-
- def select_redshifts(self, zrange):
- """
- Takes an existing tree and pares it to only include a subset of
- redshifts. Useful in paring a loaded tree.
- """
- # N.B. Does not delete info from self.relationships to save space
- # just removes it from redshift dict for indexing
- for redshift in self.redshifts.values():
- if redshift <= zrange[0] and redshift >= zrange[1]:
- # some reverse lookup magic--assumes unique cycle/z pairs
- cycle = [key for key,value in mt.redshifts.items() \
- if value == redshift][0]
- del self.redshifts[cycle]
-
- def save_tree(self, filename):
- cPickle.dump((self.redshifts, self.relationships),
- open(filename, "wb"))
-
- def load_tree(self, filename):
- self.redshifts, self.relationships = \
- cPickle.load(open(filename, "rb"))
-
- def clear_data(self):
- r"""Deletes previous merger tree, but keeps parentage
- relationships.
- """
- del self.levels
-
- def find_outputs(self, zrange, cycle_range, output):
- self.numbers = []
- if self.external_FOF:
- filenames = "%s/groups_*.dat" % (self.FOF_directory)
- files = glob.glob(filenames)
- else:
- filenames = "%s/groups_*.txt" % (self.FOF_directory)
- files = glob.glob(filenames)
- # If using redshift range, load redshifts only
- for f in files:
- num = int(f[-9:-4])
- if zrange is not None:
- HC = HaloCatalog(num, external_FOF=self.external_FOF, \
- FOF_directory=self.FOF_directory)
- # Allow for some epsilon
- diff1 = (HC.redshift - zrange[0]) / zrange[0]
- diff2 = (HC.redshift - zrange[1]) / zrange[1]
- if diff1 >= -1e-3 and diff2 <= 1e-3:
- self.numbers.append(num)
- del HC
- elif cycle_range is not None:
- if num >= cycle_range[0] and num <= cycle_range[1]:
- self.numbers.append(num)
- else:
- self.numbers.append(num)
- self.numbers.sort()
-
- def run_merger_tree(self, output):
- # Run merger tree for all outputs, starting with the last output
- for i in range(len(self.numbers)-1, 0, -1):
- if output:
- output = "%s/tree-%5.5d-%5.5d" % \
- (self.FOF_directory, self.numbers[i], self.numbers[i-1])
- else:
- output = None
- z0, z1, fr = find_halo_relationships(self.numbers[i], \
- self.numbers[i-1], \
- output_basename=output, \
- external_FOF=self.external_FOF,
- FOF_directory=self.FOF_directory)
- self.relationships[self.numbers[i]] = fr
- self.redshifts[self.numbers[i]] = z0
- # Fill in last redshift
- self.redshifts[self.numbers[0]] = z1
-
- def build_tree(self, halonum, min_particles=0, max_children=1e20):
- r"""Builds a merger tree, starting at the last output.
-
- Parameters
- ----------
- halonum : int
- Halo number in the last output to analyze.
- min_particles : int, optional
- Minimum number of particles of halos in tree.
- max_children : int, optional
- Maximum number of child halos each leaf can have.
- """
- self.halonum = halonum
- self.max_children = max_children
- self.output_numbers = sorted(self.relationships, reverse=True)
- self.levels = {}
- trunk = self.output_numbers[0]
- self.levels[trunk] = [EnzoFOFMergerBranch(self, trunk, halonum,
- max_children)]
- self.generate_tree(min_particles, max_children)
-
- def filter_small_halos(self, lvl, min_particles):
- # Filter out children with less than min_particles
- for h in self.levels[lvl]:
- fil = []
- for c in h.children:
- if c[2] > min_particles: # c[2] = npart
- fil.append(c)
- h.children = fil
-
- def generate_tree(self, min_particles, max_children):
- self.filter_small_halos(self.output_numbers[0], min_particles)
- for i in range(1,len(self.output_numbers)):
- prev = self.output_numbers[i-1]
- this = self.output_numbers[i]
- self.levels[this] = []
- this_halos = [] # To check for duplicates
- for h in self.levels[prev]:
- for c in h.children:
- if c[0] in this_halos: continue
- if self.relationships[this] == {}: continue
- branch = EnzoFOFMergerBranch(self, this, c[0],
- max_children)
- self.levels[this].append(branch)
- this_halos.append(c[0])
- self.filter_small_halos(this, min_particles)
-
- def get_massive_progenitors(self, halonum, min_relation=0.25):
- r"""Returns a list of the most massive progenitor halos.
-
- This routine walks down the tree, following the most massive
- progenitor on each node.
-
- Parameters
- ----------
- halonum : int
- Halo number at the last output to trace.
-
- Returns
- -------
- output : dict
- Dictionary of redshifts, cycle numbers, and halo numbers
- of the most massive progenitor. keys = {redshift, cycle,
- halonum}
- """
- output = {"redshift": [], "cycle": [], "halonum": []}
- # First (lowest redshift) node in tree
- halo0 = halonum
- for cycle in sorted(self.numbers, reverse=True):
- if cycle not in self.relationships: break
- if halo0 not in self.relationships[cycle]: break
- node = self.relationships[cycle][halo0]
- output["redshift"].append(self.redshifts[cycle])
- output["cycle"].append(cycle)
- output["halonum"].append(halo0)
- # Find progenitor
- max_rel = 0.0
- for k,v in node.items():
- if not str(k).isdigit(): continue
- if v[1] > max_rel and v[1] > min_relation:
- halo0 = k
- max_rel = v[1]
- return output
-
- def print_tree(self):
- r"""Prints the merger tree to stdout.
- """
- for lvl in sorted(self.levels, reverse=True):
- if lvl not in self.redshifts: continue
- print "========== Cycle %5.5d (z=%f) ==========" % \
- (lvl, self.redshifts[lvl])
- for br in self.levels[lvl]:
- print "Parent halo = %d" % br.halo_id
- print "--> Most massive progenitor == Halo %d" % \
- (br.progenitor)
- for i,c in enumerate(br.children):
- if i > self.max_children: break
- print "--> Halo %8.8d :: fraction = %g" % (c[0], c[1])
-
- def save_halo_evolution(self, filename):
- """
- Saves as an HDF5 file the relevant details about a halo
- over the course of its evolution following the most massive
- progenitor to have given it the bulk of its particles.
- It stores info from the FOF_groups file: location, mass, id, etc.
- """
- f = h5py.File("%s/%s" % (self.FOF_directory, filename), 'a')
- cycle_fin = self.redshifts.keys()[-1]
- halo_id = self.levels[cycle_fin][0].halo_id
- halo = "halo%05d" % halo_id
- if halo in f:
- del f["halo%05d" % halo_id]
- g = f.create_group("halo%05d" % halo_id)
- size = len(self.redshifts)
- cycle = np.zeros(size)
- redshift = np.zeros(size)
- halo_id = np.zeros(size)
- fraction = np.zeros(size)
- mass = np.zeros(size)
- densest_point = np.zeros((3,size))
- COM = np.zeros((6,size))
- fraction[0] = 1.
-
- for i, lvl in enumerate(sorted(self.levels, reverse=True)):
- if len(self.levels[lvl]) == 0: # lineage for this halo ends
- cycle = cycle[:i] # so truncate arrays, and break
- redshift = redshift[:i] # Not big enough.
- halo_id = halo_id[:i]
- fraction = fraction[:i]
- mass = mass[:i]
- densest_point = densest_point[:,:i]
- COM = COM[:,:i]
- break
- if lvl not in self.redshifts: continue
- mylog.info("========== Cycle %5.5d (z=%f) ==========" % \
- (lvl, self.redshifts[lvl]))
- cycle[i] = lvl
- redshift[i] = self.redshifts[lvl]
-
- br = self.levels[lvl][0]
- mylog.info("Parent halo = %d" % br.halo_id)
- mylog.info("-> Most massive progenitor == Halo %d" % (br.progenitor))
- halo_id[i] = br.halo_id
-
- if len(br.children) == 0: # lineage for this halo ends
- cycle = cycle[:i+1] # (no children)
- redshift = redshift[:i+1] # so truncate arrays, and break
- halo_id = halo_id[:i+1]
- fraction = fraction[:i+1]
- mass = mass[:i+1]
- densest_point = densest_point[:,:i+1]
- COM = COM[:,:i+1]
- break
-
- if i < size-1:
- fraction[i+1] = br.children[0][1]
-
- # open up FOF file to parse for details
- filename = "%s/groups_%05d.txt" % (self.FOF_directory, lvl)
- mass[i], densest_point[:,i], COM[:,i] = \
- grab_FOF_halo_info_internal(filename, br.halo_id)
-
- # save the arrays in the hdf5 file
- g.create_dataset("cycle", data=cycle)
- g.create_dataset("redshift", data=redshift)
- g.create_dataset("halo_id", data=halo_id)
- g.create_dataset("fraction", data=fraction)
- g.create_dataset("mass", data=mass)
- g.create_dataset("densest_point", data=densest_point)
- g.create_dataset("COM", data=COM)
- f.close()
-
- def write_dot(self, filename=None):
- r"""Writes merger tree to a GraphViz or image file.
-
- Parameters
- ----------
- filename : str, optional
- Filename to write the GraphViz file. Default will be
- tree_halo%05i.gv, which is a text file in the GraphViz format.
- If filename is an image (e.g. "MergerTree.png") the output will
- be in the appropriate image format made by calling GraphViz
- automatically. See GraphViz (e.g. "dot -v")
- for a list of available output formats.
- """
- if filename == None:
- filename = "%s/tree_halo%5.5d.gv" % \
- (self.FOF_directory, self.halonum)
- # Create the pydot graph object.
- self.graph = pydot.Dot('galaxy', graph_type='digraph')
- self.halo_shape = "rect"
- self.z_shape = "plaintext"
- # Subgraphs to align levels
- self.subgs = {}
- for num in self.numbers:
- self.subgs[num] = pydot.Subgraph('', rank = 'same')
- self.graph.add_subgraph(self.subgs[num])
- sorted_lvl = sorted(self.levels, reverse=True)
- for ii,lvl in enumerate(sorted_lvl):
- # Since we get the cycle number from the key, it won't
- # exist for the last level, i.e. children of last level.
- # Get it from self.numbers.
- if ii < len(sorted_lvl)-1:
- next_lvl = sorted_lvl[ii+1]
- else:
- next_lvl = self.numbers[0]
- for br in self.levels[lvl]:
- for c in br.children:
- color = "red" if c[0] == br.progenitor else "black"
- self.graph.add_edge(pydot.Edge("C%d_H%d" %(lvl, br.halo_id),
- "C%d_H%d" % (next_lvl, c[0]), color=color))
- #line = " C%d_H%d -> C%d_H%d [color=%s];\n" % \
- # (lvl, br.halo_id, next_lvl, c[0], color)
-
- #fp.write(line)
- last_level = (ii,lvl)
- for ii,lvl in enumerate(sorted_lvl):
- npart_max = 0
- for br in self.levels[lvl]:
- if br.npart > npart_max: npart_max = br.npart
- for br in self.levels[lvl]:
- halo_str = "C%d_H%d" % (lvl, br.halo_id)
- style = "filled" if br.npart == npart_max else "solid"
- self.graph.add_node(pydot.Node(halo_str,
- label = "Halo %d\\n%d particles" % (br.halo_id, br.npart),
- style = style, shape = self.halo_shape))
- # Add this node to the correct level subgraph.
- self.subgs[lvl].add_node(pydot.Node(halo_str))
- for lvl in self.numbers:
- # Don't add the z if there are no halos already in the subgraph.
- if len(self.subgs[lvl].get_node_list()) == 0: continue
- self.subgs[lvl].add_node(pydot.Node("%1.5e" % self.redshifts[lvl],
- shape = self.z_shape, label = "z=%0.3f" % self.redshifts[lvl]))
- # Based on the suffix of the file name, write out the result to a file.
- suffix = filename.split(".")[-1]
- if suffix == "gv": suffix = "raw"
- mylog.info("Writing %s format %s to disk." % (suffix, filename))
- self.graph.write("%s" % filename, format=suffix)
-
-def find_halo_relationships(output1_id, output2_id, output_basename = None,
- radius = 0.10, external_FOF=True,
- FOF_directory='FOF'):
- r"""Calculate the parentage and child relationships between two EnzoFOF
- halo catalogs.
-
- This function performs a very simple merger tree calculation between two
- sets of halos. For every halo in the second halo catalog, it looks to the
- first halo catalog to find the parents by looking at particle IDs. The
- particle IDs from the child halos are identified in potential parents, and
- then both percent-of-parent and percent-to-child values are recorded.
-
- Note that this works with catalogs constructed by Enzo's FOF halo
- when used in external_FOF=True mode, whereas it will work with
- catalogs constructed by yt using external_FOF=False mode.
-
- Parameters
- ----------
- output1_id : int
- This is the integer output id of the (first) halo catalog to parse and
- load.
- output2_id : int
- This is the integer output id of the (second) halo catalog to parse and
- load.
- output_basename : string
- If provided, both .cpkl and .txt files containing the parentage
- relationships will be output.
- radius : float, default to 0.10
- In absolute units, the radius to examine when guessing possible
- parent/child relationships. If this value is too small, you will miss
- possible relationships.
- FOF_directory : str, optional
- Directory where FOF files are located
-
- Returns
- -------
- pfrac : dict
- This is a dict of dicts. The first key is the parent halo id, the
- second is the child halo id. The values are the percent contributed
- from parent to child and the percent of a child that came from the
- parent.
- """
- mylog.info("Parsing Halo Catalog %04i", output1_id)
- HC1 = HaloCatalog(output1_id, False, external_FOF=external_FOF, \
- FOF_directory=FOF_directory)
- mylog.info("Parsing Halo Catalog %04i", output2_id)
- HC2 = HaloCatalog(output2_id, True, external_FOF=external_FOF, \
- FOF_directory=FOF_directory)
- mylog.info("Calculating fractions")
- pfrac = HC1.calculate_parentage_fractions(HC2)
-
- if output_basename is not None and pfrac != {}:
- f = open("%s.txt" % (output_basename), "w")
- for hid1 in sorted(pfrac):
- for hid2 in sorted(pfrac[hid1]):
- if not str(hid2).isdigit(): continue
- p1, p2, npart = pfrac[hid1][hid2]
- if p1 == 0.0: continue
- f.write( "Halo %s (%s) contributed %0.3e of its particles to %s (%s), which makes up %0.3e of that halo\n" % (
- hid1, output1_id, p2, hid2, output2_id, p1))
- f.close()
-
- cPickle.dump(pfrac, open("%s.cpkl" % (output_basename), "wb"))
-
- return HC1.redshift, HC2.redshift, pfrac
-
-def grab_FOF_halo_info_internal(filename, halo_id):
- """
- Finds a specific halo's information in the FOF group output information
- and pass relevant parameters to caller.
- """
- # open up FOF file to parse for details
- groups_file = open(filename, 'r')
- for line in groups_file:
- if line.startswith("#"): continue
- if int(line.split()[0]) == halo_id:
- ar = np.array(line.split()).astype('float64')
- return ar[1], ar[4:7], ar[7:13] # mass, xyz_dens, xyzvxvyvz_COM
-
-def plot_halo_evolution(filename, halo_id, x_quantity='cycle', y_quantity='mass',
- x_log=False, y_log=True, FOF_directory='FOF'):
- """
- Once you have generated a file using the
- EnzoFOFMergerTree.save_halo_evolution function, this is a simple way of
- plotting the evolution in the quantities of that halo over its lifetime.
-
- Parameters
- ----------
- filename : str
- The filename to which you saved the hdf5 data from save_halo_evolution
- halo_id : int
- The halo in 'filename' that you want to follow
- x_quantity, y_quantity : str, optional
- The quantity that you want to plot as the x_coord (or y_coords).
- Valid options are:
-
- * cycle
- * mass
- * fraction
- * halo_id
- * redshift
- * dense_x
- * dense_y
- * dense_z
- * COM_x
- * COM_y
- * COM_z
- * COM_vx
- * COM_vy
- * COM_vz
-
- x_log, y_log : bool, optional
- Do you want the x(y)-axis to be in log or linear?
- FOF_directory : str, optional
- Directory where FOF files (and hdf file) are located
-
- Examples
- --------
-
- >>> # generates mass history plots for the 20 most massive halos at t_fin.
- >>> ts = DatasetSeries.from_filenames("DD????/DD????")
- >>> # long step--must run FOF on each DD, but saves outputs for later use
- >>> for ds in ts:
- ... halo_list = FOFHaloFinder(ds)
- ... i = int(ds.basename[2:])
- ... halo_list.write_out("FOF/groups_%05i.txt" % i)
- ... halo_list.write_particle_lists("FOF/particles_%05i" % i)
- ...
- >>> mt = EnzoFOFMergerTree(external_FOF=False)
- >>> for i in range(20):
- ... mt.build_tree(i)
- ... mt.save_halo_evolution('halos.h5')
- ...
- >>> for i in range(20):
- ... plot_halo_evolution('halos.h5', i)
- """
- import matplotlib.pyplot as plt
- f = h5py.File("%s/%s" % (FOF_directory, filename), 'r')
- basename = os.path.splitext(filename)[0]
- halo = "halo%05d" % halo_id
- basename = basename + "_" + halo
- g = f[halo]
- values = list(g)
- index_dict = {'x' : 0, 'y' : 1, 'z' : 2, 'vx' : 3, 'vy' : 4, 'vz' : 5}
- coords = {}
- fields = {}
- for i, quantity in enumerate((x_quantity, y_quantity)):
- field = quantity
- if quantity.startswith('COM'):
- index = index_dict[quantity.split('_')[-1]]
- quantity = ('COM')
- if quantity.startswith('dense'):
- index = index_dict[quantity.split('_')[-1]]
- quantity = ('densest_point')
- if quantity not in values:
- exit('%s not in list of values in %s for halo %d' % \
- (quantity, filename, halo_id))
- if not field == quantity:
- coords[i] = g[quantity][index,:]
- else:
- coords[i] = g[quantity]
- if len(coords[i]) == 1:
- # ("Only 1 value for Halo %d. Ignoring." % halo_id)
- return
- fields[i] = field
-
- ax = plt.axes()
- ax.plot(coords[0], coords[1])
- ax.set_title(basename)
- ax.set_xlabel(fields[0])
- ax.set_ylabel(fields[1])
- if x_log:
- ax.set_xscale("log")
- if y_log:
- ax.set_yscale("log")
- ofn = "%s/%s_%s_%s.png" % (FOF_directory, basename, fields[0], fields[1])
- plt.savefig(ofn)
- plt.clf()
https://bitbucket.org/yt_analysis/yt/commits/a096fb6e0457/
Changeset: a096fb6e0457
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-28 20:33:23
Summary: Removing subpackage call.
Affected #: 1 file
diff -r cf5e6f52f838ac2ef6d312cbfe546bc975b6112a -r a096fb6e045715e6a309aa851459c021c65b10c6 yt/analysis_modules/setup.py
--- a/yt/analysis_modules/setup.py
+++ b/yt/analysis_modules/setup.py
@@ -8,7 +8,6 @@
config.add_subpackage("cosmological_observation")
config.add_subpackage("halo_finding")
config.add_subpackage("halo_mass_function")
- config.add_subpackage("halo_merger_tree")
config.add_subpackage("level_sets")
config.add_subpackage("particle_trajectories")
config.add_subpackage("photon_simulator")
https://bitbucket.org/yt_analysis/yt/commits/ab3741274267/
Changeset: ab3741274267
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-29 22:33:41
Summary: Fixing docstring.
Affected #: 1 file
diff -r a096fb6e045715e6a309aa851459c021c65b10c6 -r ab37412742679445ee6baf74c5faed157890c23e yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
--- a/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
+++ b/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
@@ -137,10 +137,10 @@
Examples
--------
- ds = load("DD0000/DD0000")
- halo_list = FOFHaloFinder(ds)
- halo_list.write_out("FOF/groups_00000.txt")
- halos_COM = parse_halo_catalog_internal()
+ >>> ds = load("DD0000/DD0000")
+ >>> halo_list = FOFHaloFinder(ds)
+ >>> halo_list.write_out("FOF/groups_00000.txt")
+ >>> halos_COM = parse_halo_catalog_internal()
"""
hp = []
for line in open("%s/groups_%05i.txt" % \
https://bitbucket.org/yt_analysis/yt/commits/d533d67ea947/
Changeset: d533d67ea947
Branch: yt-3.0
User: brittonsmith
Date: 2014-07-29 22:35:23
Summary: Fixing another docstring.
Affected #: 1 file
diff -r ab37412742679445ee6baf74c5faed157890c23e -r d533d67ea9478350b150d4018c6271c3a2f7b465 yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
--- a/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
+++ b/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
@@ -268,10 +268,10 @@
Examples
--------
- mt = EnzoFOFMergerTree() # by default it grabs every DD in FOF dir
- mt.build_tree(0) # Create tree for halo 0
- mt.print_tree()
- mt.write_dot()
+ >>> mt = EnzoFOFMergerTree() # by default it grabs every DD in FOF dir
+ >>> mt.build_tree(0) # Create tree for halo 0
+ >>> mt.print_tree()
+ >>> mt.write_dot()
See Also
--------
https://bitbucket.org/yt_analysis/yt/commits/9cfc115c3d00/
Changeset: 9cfc115c3d00
Branch: yt-3.0
User: ngoldbaum
Date: 2014-07-29 22:42:49
Summary: Merged in brittonsmith/yt/yt-3.0 (pull request #1081)
Halo cleanup.
Affected #: 11 files
diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 doc/source/reference/api/api.rst
--- a/doc/source/reference/api/api.rst
+++ b/doc/source/reference/api/api.rst
@@ -279,6 +279,15 @@
.. autosummary::
:toctree: generated/
+ ~yt.frontends.halo_catalogs.halo_catalog.data_structures.HaloCatalogHDF5File
+ ~yt.frontends.halo_catalogs.halo_catalog.data_structures.HaloCatalogDataset
+ ~yt.frontends.halo_catalogs.halo_catalog.fields.HaloCatalogFieldInfo
+ ~yt.frontends.halo_catalogs.halo_catalog.io.IOHandlerHaloCatalogHDF5
+ ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindParticleIndex
+ ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindHDF5File
+ ~yt.frontends.halo_catalogs.owls_subfind.data_structures.OWLSSubfindDataset
+ ~yt.frontends.halo_catalogs.owls_subfind.fields.OWLSSubfindFieldInfo
+ ~yt.frontends.halo_catalogs.owls_subfind.io.IOHandlerOWLSSubfindHDF5
~yt.frontends.halo_catalogs.rockstar.data_structures.RockstarBinaryFile
~yt.frontends.halo_catalogs.rockstar.data_structures.RockstarDataset
~yt.frontends.halo_catalogs.rockstar.fields.RockstarFieldInfo
@@ -394,20 +403,45 @@
~yt.data_objects.profiles.Profile3D
~yt.data_objects.profiles.create_profile
-Halo Finding and Particle Functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Halo Analysis
+^^^^^^^^^^^^^
-Halo finding can be executed using these types. Here we list the main halo
-finders as well as a few other supplemental objects.
+The ``HaloCatalog`` object is the primary means for performing custom analysis
+on cosmological halos. It is also the primary interface for halo finding.
-.. rubric:: Halo Finders
+.. autosummary::
+ :toctree: generated/
+
+ ~yt.analysis_modules.halo_analysis.halo_catalog.HaloCatalog
+ ~yt.analysis_modules.halo_analysis.halo_finding_methods.HaloFindingMethod
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.HaloCallback
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.halo_sphere
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.sphere_field_max_recenter
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.sphere_bulk_velocity
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.profile
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.save_profiles
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.load_profiles
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.virial_quantities
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.phase_plot
+ ~yt.analysis_modules.halo_analysis.halo_callbacks.delete_attribute
+ ~yt.analysis_modules.halo_analysis.halo_filters.HaloFilter
+ ~yt.analysis_modules.halo_analysis.halo_filters.quantity_value
+ ~yt.analysis_modules.halo_analysis.halo_filters.not_subhalo
+ ~yt.analysis_modules.halo_analysis.halo_quantities.HaloQuantity
+ ~yt.analysis_modules.halo_analysis.halo_quantities.center_of_mass
+ ~yt.analysis_modules.halo_analysis.halo_quantities.bulk_velocity
+
+Halo Finding
+^^^^^^^^^^^^
+
+These provide direct access to the halo finders. However, it is strongly recommended
+to use the ``HaloCatalog``.
.. autosummary::
:toctree: generated/
~yt.analysis_modules.halo_finding.halo_objects.FOFHaloFinder
~yt.analysis_modules.halo_finding.halo_objects.HOPHaloFinder
- ~yt.analysis_modules.halo_finding.halo_objects.parallelHF
~yt.analysis_modules.halo_finding.rockstar.rockstar.RockstarHaloFinder
Two Point Functions
diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
--- /dev/null
+++ b/yt/analysis_modules/halo_analysis/enzofof_merger_tree.py
@@ -0,0 +1,801 @@
+"""
+A very simple, purely-serial, merger tree script that knows how to parse FOF
+catalogs, either output by Enzo or output by yt's FOF halo finder, and then
+compare parent/child relationships.
+
+
+
+"""
+
+#-----------------------------------------------------------------------------
+# Copyright (c) 2013, yt Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+# plot_halo_evolution() gives a good full example of how to use the framework
+
+# First pass at a simplified merger tree
+#
+# Basic outline:
+#
+# 1. Halo find inline, obtaining particle catalogs
+# 2. Load dataset at time t
+# 3. Load dataset at time t+1
+# 4. Parse catalogs for t and t+1
+# 5. Place halos for t+1 in kD-tree
+# 6. For every halo in t, execute ball-query with some linking length
+# 7. For every halo in ball-query result, execute numpy's intersect1d on
+# particle IDs
+# 8. Parentage is described by a fraction of particles that pass from one to
+# the other; we have both descendent fractions and ancestory fractions.
+
+
+import numpy as np
+import h5py
+import time
+import pdb
+import cPickle
+import glob
+
+from yt.funcs import *
+from yt.extern.pykdtree import KDTree
+import yt.extern.pydot as pydot
+
+# We don't currently use this, but we may again find a use for it in the
+# future.
+class MaxLengthDict(dict):
+ def __init__(self, *args, **kwargs):
+ dict.__init__(self, *args, **kwargs)
+ self.order = [None] * 50
+
+ def __setitem__(self, key, val):
+ if key not in self.order:
+ to_remove = self.order.pop(0)
+ self.pop(to_remove, None)
+ self.order.append(key)
+ dict.__setitem__(self, key, val)
+
+ def __getitem__(self, key):
+ if key in self.order:
+ self.order.pop(self.order.index(key))
+ self.order.append(key)
+ return dict.__getitem__(self, key)
+
+ def __delitem__(self, key):
+ dict.__delitem__(self, key)
+ self.order.pop(self.order.index(key))
+ self.order.insert(0, None)
+
+class HaloCatalog(object):
+ r"""A catalog of halos, parsed from EnzoFOF outputs.
+
+ This class will read in catalogs output by the Enzo FOF halo finder and
+ make available their positions, radii, etc. Enzo FOF was provided
+ starting with 2.0, and can be run either inline (with the correct
+ options) or as a postprocessing step using the `-F` command line
+ option. This class is mostly useful when calculating a merger tree,
+ and when the particle IDs for members of a given halo are output as
+ well.
+
+ Parameters
+ ----------
+ output_id : int
+ This is the integer output id of the halo catalog to parse and
+ load.
+ cache : bool
+ Should we store, in between accesses, the particle IDs? If set to
+ true, the correct particle files must exist.
+ external_FOF : bool, optional
+ Are we building a tree from outputs generated by an
+ external FOF program, or an FOF internal to yt?
+ FOF_directory : str, optional
+ Directory where FOF files are located
+ """
+ cache = None
+ def __init__(self, output_id, cache = True, external_FOF=True, FOF_directory="FOF"):
+ self.output_id = output_id
+ self.external_FOF = external_FOF
+ self.redshift = 0.0
+ self.FOF_directory = FOF_directory
+ self.particle_file = h5py.File("%s/particles_%05i.h5" % \
+ (FOF_directory, output_id), "r")
+ if self.external_FOF:
+ self.parse_halo_catalog_external()
+ else:
+ self.parse_halo_catalog_internal()
+ if cache: self.cache = dict()#MaxLengthDict()
+
+ def __del__(self):
+ self.particle_file.close()
+
+ def parse_halo_catalog_external(self):
+ hp = []
+ for line in open("%s/groups_%05i.dat" % \
+ (self.FOF_directory, self.output_id)):
+ if line.strip() == "": continue # empty
+ if line.startswith("# Red"):
+ self.redshift = float(line.split("=")[1])
+ if line[0] == "#": continue # comment
+ if line[0] == "d": continue # datavar
+ x,y,z = [float(f) for f in line.split(None, 3)[:-1]]
+ hp.append([x,y,z])
+ if hp != []:
+ self.halo_positions = np.array(hp)
+ self.halo_kdtree = KDTree(self.halo_positions)
+ else:
+ self.halo_positions = None
+ self.halo_kdtree = None
+ return hp
+
+ def parse_halo_catalog_internal(self):
+ """
+ This parser works on the files output directly out of yt's internal
+ halo_finder. The parse_halo_catalog_external works with an
+ external version of FOF.
+
+ Examples
+ --------
+ >>> ds = load("DD0000/DD0000")
+ >>> halo_list = FOFHaloFinder(ds)
+ >>> halo_list.write_out("FOF/groups_00000.txt")
+ >>> halos_COM = parse_halo_catalog_internal()
+ """
+ hp = []
+ for line in open("%s/groups_%05i.txt" % \
+ (self.FOF_directory, self.output_id)):
+ if line.startswith("# RED"):
+ self.redshift = float(line.split("=")[1])
+ continue
+ if line.strip() == "": continue # empty
+ if line[0] == "#": continue # comment
+ x,y,z = [float(f) for f in line.split()[7:10]] # COM x,y,z
+ hp.append([x,y,z])
+ if hp != []:
+ self.halo_positions = np.array(hp)
+ self.halo_kdtree = KDTree(self.halo_positions)
+ else:
+ self.halo_positions = None
+ self.halo_kdtree = None
+ return hp
+
+ def read_particle_ids(self, halo_id):
+ if self.cache is not None:
+ if halo_id not in self.cache:
+ if self.external_FOF:
+ self.cache[halo_id] = \
+ self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
+ else:
+ self.cache[halo_id] = \
+ self.particle_file["/Halo%08i/particle_index" % halo_id][:]
+ ids = self.cache[halo_id]
+ else:
+ if self.external_FOF:
+ ids = self.particle_file["/Halo%08i/Particle ID" % halo_id][:]
+ else:
+ ids = self.particle_file["/Halo%08i/particle_index" % halo_id][:]
+ return HaloParticleList(halo_id, self.halo_positions[halo_id,:], ids)
+
+ def calculate_parentage_fractions(self, other_catalog, radius = 0.10):
+ parentage_fractions = {}
+ if self.halo_positions == None or other_catalog.halo_positions == None:
+ return parentage_fractions
+ mylog.debug("Ball-tree query with radius %0.3e", radius)
+ all_nearest = self.halo_kdtree.query_ball_tree(
+ other_catalog.halo_kdtree, radius)
+ pbar = get_pbar("Halo Mergers", self.halo_positions.shape[0])
+ for hid1, nearest in enumerate(all_nearest):
+ pbar.update(hid1)
+ parentage_fractions[hid1] = {}
+ HPL1 = self.read_particle_ids(hid1)
+ for hid2 in sorted(nearest):
+ HPL2 = other_catalog.read_particle_ids(hid2)
+ p1, p2 = HPL1.find_relative_parentage(HPL2)
+ parentage_fractions[hid1][hid2] = (p1, p2, HPL2.number_of_particles)
+ parentage_fractions[hid1]["NumberOfParticles"] = HPL1.number_of_particles
+ pbar.finish()
+ return parentage_fractions
+
+class HaloParticleList(object):
+ def __init__(self, halo_id, position, particle_ids):
+ self.halo_id = halo_id
+ self.position = np.array(position)
+ self.particle_ids = particle_ids
+ self.number_of_particles = particle_ids.size
+
+ def find_nearest(self, other_tree, radius = 0.10):
+ return other_tree.query_ball_point(self.position, radius)
+
+ def find_relative_parentage(self, child):
+ # Return two values: percent this halo gave to the other, and percent
+ # of the other that comes from this halo
+ overlap = np.intersect1d(self.particle_ids, child.particle_ids).size
+ of_child_from_me = float(overlap)/child.particle_ids.size
+ of_mine_from_me = float(overlap)/self.particle_ids.size
+ return of_child_from_me, of_mine_from_me
+
+class EnzoFOFMergerBranch(object):
+ def __init__(self, tree, output_num, halo_id, max_children,
+ min_relation=0.25):
+ self.output_num = output_num
+ self.halo_id = halo_id
+ self.npart = tree.relationships[output_num][halo_id]["NumberOfParticles"]
+ self.children = []
+ self.progenitor = -1
+ max_relationship = 0.0
+ halo_count = 0
+ sorted_keys = sorted(tree.relationships[output_num][halo_id].keys())
+ for k in sorted_keys:
+ if not str(k).isdigit(): continue
+ v = tree.relationships[output_num][halo_id][k]
+ if v[1] > min_relation and halo_count < max_children:
+ halo_count += 1
+ self.children.append((k,v[1],v[2]))
+ if v[1] > max_relationship:
+ self.progenitor = k
+ max_relationship = v[1]
+
+class EnzoFOFMergerTree(object):
+ r"""Calculates the parentage relationships for halos for a series of
+ outputs, using the framework provided in enzofof_merger_tree.
+
+ Parameters
+ ----------
+ zrange : tuple
+ This is the redshift range (min, max) to calculate the
+ merger tree. E.g. (0, 2) for z=2 to z=0
+ cycle_range : tuple, optional
+ This is the cycle number range (min, max) to calculate the
+ merger tree. If both zrange and cycle_number given,
+ ignore zrange.
+ output : bool, optional
+ If provided, both .cpkl and .txt files containing the parentage
+ relationships will be output.
+ load_saved : bool, optional
+ Flag to load previously saved parental relationships
+ save_filename : str, optional
+ Filename to save parental relationships
+ external_FOF : bool, optional
+ Are we building a tree from outputs generated by an
+ external FOF program, or an FOF internal to yt?
+ FOF_directory : str, optional
+ Directory where FOF files are located, note that the files
+ must be named according to the syntax: groups_DDDDD.txt for
+ internal yt outputs, and groups_DDDDD.dat for external FOF outputs.
+ where DDDDD are digits representing the equivalent cycle number.
+ e.g. groups_00000.txt
+
+ Examples
+ --------
+ >>> mt = EnzoFOFMergerTree() # by default it grabs every DD in FOF dir
+ >>> mt.build_tree(0) # Create tree for halo 0
+ >>> mt.print_tree()
+ >>> mt.write_dot()
+
+ See Also
+ --------
+ plot_halo_evolution()
+ """
+ def __init__(self, zrange=None, cycle_range=None, output=False,
+ load_saved=False, save_filename="merger_tree.cpkl",
+ external_FOF=True, FOF_directory="FOF"):
+
+ self.relationships = {}
+ self.redshifts = {}
+ self.external_FOF = external_FOF
+ self.FOF_directory = FOF_directory
+ if load_saved:
+ self.load_tree("%s/%s" % (self.FOF_directory, save_filename))
+ # make merger tree work within specified cycle/z limits
+ # on preloaded halos
+ if zrange is not None:
+ self.select_redshifts(zrange)
+ if cycle_range is not None:
+ self.select_cycles(cycle_range)
+ else:
+ self.find_outputs(zrange, cycle_range, output)
+ self.run_merger_tree(output)
+ self.save_tree("%s/%s" % (self.FOF_directory, save_filename))
+
+ def select_cycles(self, cycle_range):
+ """
+ Takes an existing tree and pares it to only include a subset of
+ cycles. Useful in paring a loaded tree.
+ """
+ # N.B. Does not delete info from self.relationships to save space
+ # just removes it from redshift dict for indexing
+ for cycle in self.redshifts.keys():
+ if cycle <= cycle_range[0] and cycle >= cycle_range[1]:
+ del self.redshifts[cycle]
+
+ def select_redshifts(self, zrange):
+ """
+ Takes an existing tree and pares it to only include a subset of
+ redshifts. Useful in paring a loaded tree.
+ """
+ # N.B. Does not delete info from self.relationships to save space
+ # just removes it from redshift dict for indexing
+ for redshift in self.redshifts.values():
+ if redshift <= zrange[0] and redshift >= zrange[1]:
+ # some reverse lookup magic--assumes unique cycle/z pairs
+ cycle = [key for key,value in mt.redshifts.items() \
+ if value == redshift][0]
+ del self.redshifts[cycle]
+
+ def save_tree(self, filename):
+ cPickle.dump((self.redshifts, self.relationships),
+ open(filename, "wb"))
+
+ def load_tree(self, filename):
+ self.redshifts, self.relationships = \
+ cPickle.load(open(filename, "rb"))
+
+ def clear_data(self):
+ r"""Deletes previous merger tree, but keeps parentage
+ relationships.
+ """
+ del self.levels
+
+ def find_outputs(self, zrange, cycle_range, output):
+ self.numbers = []
+ if self.external_FOF:
+ filenames = "%s/groups_*.dat" % (self.FOF_directory)
+ files = glob.glob(filenames)
+ else:
+ filenames = "%s/groups_*.txt" % (self.FOF_directory)
+ files = glob.glob(filenames)
+ # If using redshift range, load redshifts only
+ for f in files:
+ num = int(f[-9:-4])
+ if zrange is not None:
+ HC = HaloCatalog(num, external_FOF=self.external_FOF, \
+ FOF_directory=self.FOF_directory)
+ # Allow for some epsilon
+ diff1 = (HC.redshift - zrange[0]) / zrange[0]
+ diff2 = (HC.redshift - zrange[1]) / zrange[1]
+ if diff1 >= -1e-3 and diff2 <= 1e-3:
+ self.numbers.append(num)
+ del HC
+ elif cycle_range is not None:
+ if num >= cycle_range[0] and num <= cycle_range[1]:
+ self.numbers.append(num)
+ else:
+ self.numbers.append(num)
+ self.numbers.sort()
+
+ def run_merger_tree(self, output):
+ # Run merger tree for all outputs, starting with the last output
+ for i in range(len(self.numbers)-1, 0, -1):
+ if output:
+ output = "%s/tree-%5.5d-%5.5d" % \
+ (self.FOF_directory, self.numbers[i], self.numbers[i-1])
+ else:
+ output = None
+ z0, z1, fr = find_halo_relationships(self.numbers[i], \
+ self.numbers[i-1], \
+ output_basename=output, \
+ external_FOF=self.external_FOF,
+ FOF_directory=self.FOF_directory)
+ self.relationships[self.numbers[i]] = fr
+ self.redshifts[self.numbers[i]] = z0
+ # Fill in last redshift
+ self.redshifts[self.numbers[0]] = z1
+
+ def build_tree(self, halonum, min_particles=0, max_children=1e20):
+ r"""Builds a merger tree, starting at the last output.
+
+ Parameters
+ ----------
+ halonum : int
+ Halo number in the last output to analyze.
+ min_particles : int, optional
+ Minimum number of particles of halos in tree.
+ max_children : int, optional
+ Maximum number of child halos each leaf can have.
+ """
+ self.halonum = halonum
+ self.max_children = max_children
+ self.output_numbers = sorted(self.relationships, reverse=True)
+ self.levels = {}
+ trunk = self.output_numbers[0]
+ self.levels[trunk] = [EnzoFOFMergerBranch(self, trunk, halonum,
+ max_children)]
+ self.generate_tree(min_particles, max_children)
+
+ def filter_small_halos(self, lvl, min_particles):
+ # Filter out children with less than min_particles
+ for h in self.levels[lvl]:
+ fil = []
+ for c in h.children:
+ if c[2] > min_particles: # c[2] = npart
+ fil.append(c)
+ h.children = fil
+
+ def generate_tree(self, min_particles, max_children):
+ self.filter_small_halos(self.output_numbers[0], min_particles)
+ for i in range(1,len(self.output_numbers)):
+ prev = self.output_numbers[i-1]
+ this = self.output_numbers[i]
+ self.levels[this] = []
+ this_halos = [] # To check for duplicates
+ for h in self.levels[prev]:
+ for c in h.children:
+ if c[0] in this_halos: continue
+ if self.relationships[this] == {}: continue
+ branch = EnzoFOFMergerBranch(self, this, c[0],
+ max_children)
+ self.levels[this].append(branch)
+ this_halos.append(c[0])
+ self.filter_small_halos(this, min_particles)
+
+ def get_massive_progenitors(self, halonum, min_relation=0.25):
+ r"""Returns a list of the most massive progenitor halos.
+
+ This routine walks down the tree, following the most massive
+ progenitor on each node.
+
+ Parameters
+ ----------
+ halonum : int
+ Halo number at the last output to trace.
+
+ Returns
+ -------
+ output : dict
+ Dictionary of redshifts, cycle numbers, and halo numbers
+ of the most massive progenitor. keys = {redshift, cycle,
+ halonum}
+ """
+ output = {"redshift": [], "cycle": [], "halonum": []}
+ # First (lowest redshift) node in tree
+ halo0 = halonum
+ for cycle in sorted(self.numbers, reverse=True):
+ if cycle not in self.relationships: break
+ if halo0 not in self.relationships[cycle]: break
+ node = self.relationships[cycle][halo0]
+ output["redshift"].append(self.redshifts[cycle])
+ output["cycle"].append(cycle)
+ output["halonum"].append(halo0)
+ # Find progenitor
+ max_rel = 0.0
+ for k,v in node.items():
+ if not str(k).isdigit(): continue
+ if v[1] > max_rel and v[1] > min_relation:
+ halo0 = k
+ max_rel = v[1]
+ return output
+
+ def print_tree(self):
+ r"""Prints the merger tree to stdout.
+ """
+ for lvl in sorted(self.levels, reverse=True):
+ if lvl not in self.redshifts: continue
+ print "========== Cycle %5.5d (z=%f) ==========" % \
+ (lvl, self.redshifts[lvl])
+ for br in self.levels[lvl]:
+ print "Parent halo = %d" % br.halo_id
+ print "--> Most massive progenitor == Halo %d" % \
+ (br.progenitor)
+ for i,c in enumerate(br.children):
+ if i > self.max_children: break
+ print "--> Halo %8.8d :: fraction = %g" % (c[0], c[1])
+
+ def save_halo_evolution(self, filename):
+ """
+ Saves as an HDF5 file the relevant details about a halo
+ over the course of its evolution following the most massive
+ progenitor to have given it the bulk of its particles.
+ It stores info from the FOF_groups file: location, mass, id, etc.
+ """
+ f = h5py.File("%s/%s" % (self.FOF_directory, filename), 'a')
+ cycle_fin = self.redshifts.keys()[-1]
+ halo_id = self.levels[cycle_fin][0].halo_id
+ halo = "halo%05d" % halo_id
+ if halo in f:
+ del f["halo%05d" % halo_id]
+ g = f.create_group("halo%05d" % halo_id)
+ size = len(self.redshifts)
+ cycle = np.zeros(size)
+ redshift = np.zeros(size)
+ halo_id = np.zeros(size)
+ fraction = np.zeros(size)
+ mass = np.zeros(size)
+ densest_point = np.zeros((3,size))
+ COM = np.zeros((6,size))
+ fraction[0] = 1.
+
+ for i, lvl in enumerate(sorted(self.levels, reverse=True)):
+ if len(self.levels[lvl]) == 0: # lineage for this halo ends
+ cycle = cycle[:i] # so truncate arrays, and break
+ redshift = redshift[:i] # Not big enough.
+ halo_id = halo_id[:i]
+ fraction = fraction[:i]
+ mass = mass[:i]
+ densest_point = densest_point[:,:i]
+ COM = COM[:,:i]
+ break
+ if lvl not in self.redshifts: continue
+ mylog.info("========== Cycle %5.5d (z=%f) ==========" % \
+ (lvl, self.redshifts[lvl]))
+ cycle[i] = lvl
+ redshift[i] = self.redshifts[lvl]
+
+ br = self.levels[lvl][0]
+ mylog.info("Parent halo = %d" % br.halo_id)
+ mylog.info("-> Most massive progenitor == Halo %d" % (br.progenitor))
+ halo_id[i] = br.halo_id
+
+ if len(br.children) == 0: # lineage for this halo ends
+ cycle = cycle[:i+1] # (no children)
+ redshift = redshift[:i+1] # so truncate arrays, and break
+ halo_id = halo_id[:i+1]
+ fraction = fraction[:i+1]
+ mass = mass[:i+1]
+ densest_point = densest_point[:,:i+1]
+ COM = COM[:,:i+1]
+ break
+
+ if i < size-1:
+ fraction[i+1] = br.children[0][1]
+
+ # open up FOF file to parse for details
+ filename = "%s/groups_%05d.txt" % (self.FOF_directory, lvl)
+ mass[i], densest_point[:,i], COM[:,i] = \
+ grab_FOF_halo_info_internal(filename, br.halo_id)
+
+ # save the arrays in the hdf5 file
+ g.create_dataset("cycle", data=cycle)
+ g.create_dataset("redshift", data=redshift)
+ g.create_dataset("halo_id", data=halo_id)
+ g.create_dataset("fraction", data=fraction)
+ g.create_dataset("mass", data=mass)
+ g.create_dataset("densest_point", data=densest_point)
+ g.create_dataset("COM", data=COM)
+ f.close()
+
+ def write_dot(self, filename=None):
+ r"""Writes merger tree to a GraphViz or image file.
+
+ Parameters
+ ----------
+ filename : str, optional
+ Filename to write the GraphViz file. Default will be
+ tree_halo%05i.gv, which is a text file in the GraphViz format.
+ If filename is an image (e.g. "MergerTree.png") the output will
+ be in the appropriate image format made by calling GraphViz
+ automatically. See GraphViz (e.g. "dot -v")
+ for a list of available output formats.
+ """
+ if filename == None:
+ filename = "%s/tree_halo%5.5d.gv" % \
+ (self.FOF_directory, self.halonum)
+ # Create the pydot graph object.
+ self.graph = pydot.Dot('galaxy', graph_type='digraph')
+ self.halo_shape = "rect"
+ self.z_shape = "plaintext"
+ # Subgraphs to align levels
+ self.subgs = {}
+ for num in self.numbers:
+ self.subgs[num] = pydot.Subgraph('', rank = 'same')
+ self.graph.add_subgraph(self.subgs[num])
+ sorted_lvl = sorted(self.levels, reverse=True)
+ for ii,lvl in enumerate(sorted_lvl):
+ # Since we get the cycle number from the key, it won't
+ # exist for the last level, i.e. children of last level.
+ # Get it from self.numbers.
+ if ii < len(sorted_lvl)-1:
+ next_lvl = sorted_lvl[ii+1]
+ else:
+ next_lvl = self.numbers[0]
+ for br in self.levels[lvl]:
+ for c in br.children:
+ color = "red" if c[0] == br.progenitor else "black"
+ self.graph.add_edge(pydot.Edge("C%d_H%d" %(lvl, br.halo_id),
+ "C%d_H%d" % (next_lvl, c[0]), color=color))
+ #line = " C%d_H%d -> C%d_H%d [color=%s];\n" % \
+ # (lvl, br.halo_id, next_lvl, c[0], color)
+
+ #fp.write(line)
+ last_level = (ii,lvl)
+ for ii,lvl in enumerate(sorted_lvl):
+ npart_max = 0
+ for br in self.levels[lvl]:
+ if br.npart > npart_max: npart_max = br.npart
+ for br in self.levels[lvl]:
+ halo_str = "C%d_H%d" % (lvl, br.halo_id)
+ style = "filled" if br.npart == npart_max else "solid"
+ self.graph.add_node(pydot.Node(halo_str,
+ label = "Halo %d\\n%d particles" % (br.halo_id, br.npart),
+ style = style, shape = self.halo_shape))
+ # Add this node to the correct level subgraph.
+ self.subgs[lvl].add_node(pydot.Node(halo_str))
+ for lvl in self.numbers:
+ # Don't add the z if there are no halos already in the subgraph.
+ if len(self.subgs[lvl].get_node_list()) == 0: continue
+ self.subgs[lvl].add_node(pydot.Node("%1.5e" % self.redshifts[lvl],
+ shape = self.z_shape, label = "z=%0.3f" % self.redshifts[lvl]))
+ # Based on the suffix of the file name, write out the result to a file.
+ suffix = filename.split(".")[-1]
+ if suffix == "gv": suffix = "raw"
+ mylog.info("Writing %s format %s to disk." % (suffix, filename))
+ self.graph.write("%s" % filename, format=suffix)
+
+def find_halo_relationships(output1_id, output2_id, output_basename = None,
+ radius = 0.10, external_FOF=True,
+ FOF_directory='FOF'):
+ r"""Calculate the parentage and child relationships between two EnzoFOF
+ halo catalogs.
+
+ This function performs a very simple merger tree calculation between two
+ sets of halos. For every halo in the second halo catalog, it looks to the
+ first halo catalog to find the parents by looking at particle IDs. The
+ particle IDs from the child halos are identified in potential parents, and
+ then both percent-of-parent and percent-to-child values are recorded.
+
+ Note that this works with catalogs constructed by Enzo's FOF halo
+ when used in external_FOF=True mode, whereas it will work with
+ catalogs constructed by yt using external_FOF=False mode.
+
+ Parameters
+ ----------
+ output1_id : int
+ This is the integer output id of the (first) halo catalog to parse and
+ load.
+ output2_id : int
+ This is the integer output id of the (second) halo catalog to parse and
+ load.
+ output_basename : string
+ If provided, both .cpkl and .txt files containing the parentage
+ relationships will be output.
+ radius : float, default to 0.10
+ In absolute units, the radius to examine when guessing possible
+ parent/child relationships. If this value is too small, you will miss
+ possible relationships.
+ FOF_directory : str, optional
+ Directory where FOF files are located
+
+ Returns
+ -------
+ pfrac : dict
+ This is a dict of dicts. The first key is the parent halo id, the
+ second is the child halo id. The values are the percent contributed
+ from parent to child and the percent of a child that came from the
+ parent.
+ """
+ mylog.info("Parsing Halo Catalog %04i", output1_id)
+ HC1 = HaloCatalog(output1_id, False, external_FOF=external_FOF, \
+ FOF_directory=FOF_directory)
+ mylog.info("Parsing Halo Catalog %04i", output2_id)
+ HC2 = HaloCatalog(output2_id, True, external_FOF=external_FOF, \
+ FOF_directory=FOF_directory)
+ mylog.info("Calculating fractions")
+ pfrac = HC1.calculate_parentage_fractions(HC2)
+
+ if output_basename is not None and pfrac != {}:
+ f = open("%s.txt" % (output_basename), "w")
+ for hid1 in sorted(pfrac):
+ for hid2 in sorted(pfrac[hid1]):
+ if not str(hid2).isdigit(): continue
+ p1, p2, npart = pfrac[hid1][hid2]
+ if p1 == 0.0: continue
+ f.write( "Halo %s (%s) contributed %0.3e of its particles to %s (%s), which makes up %0.3e of that halo\n" % (
+ hid1, output1_id, p2, hid2, output2_id, p1))
+ f.close()
+
+ cPickle.dump(pfrac, open("%s.cpkl" % (output_basename), "wb"))
+
+ return HC1.redshift, HC2.redshift, pfrac
+
+def grab_FOF_halo_info_internal(filename, halo_id):
+ """
+ Finds a specific halo's information in the FOF group output information
+ and pass relevant parameters to caller.
+ """
+ # open up FOF file to parse for details
+ groups_file = open(filename, 'r')
+ for line in groups_file:
+ if line.startswith("#"): continue
+ if int(line.split()[0]) == halo_id:
+ ar = np.array(line.split()).astype('float64')
+ return ar[1], ar[4:7], ar[7:13] # mass, xyz_dens, xyzvxvyvz_COM
+
+def plot_halo_evolution(filename, halo_id, x_quantity='cycle', y_quantity='mass',
+ x_log=False, y_log=True, FOF_directory='FOF'):
+ """
+ Once you have generated a file using the
+ EnzoFOFMergerTree.save_halo_evolution function, this is a simple way of
+ plotting the evolution in the quantities of that halo over its lifetime.
+
+ Parameters
+ ----------
+ filename : str
+ The filename to which you saved the hdf5 data from save_halo_evolution
+ halo_id : int
+ The halo in 'filename' that you want to follow
+ x_quantity, y_quantity : str, optional
+ The quantity that you want to plot as the x_coord (or y_coords).
+ Valid options are:
+
+ * cycle
+ * mass
+ * fraction
+ * halo_id
+ * redshift
+ * dense_x
+ * dense_y
+ * dense_z
+ * COM_x
+ * COM_y
+ * COM_z
+ * COM_vx
+ * COM_vy
+ * COM_vz
+
+ x_log, y_log : bool, optional
+ Do you want the x(y)-axis to be in log or linear?
+ FOF_directory : str, optional
+ Directory where FOF files (and hdf file) are located
+
+ Examples
+ --------
+
+ >>> # generates mass history plots for the 20 most massive halos at t_fin.
+ >>> ts = DatasetSeries.from_filenames("DD????/DD????")
+ >>> # long step--must run FOF on each DD, but saves outputs for later use
+ >>> for ds in ts:
+ ... halo_list = FOFHaloFinder(ds)
+ ... i = int(ds.basename[2:])
+ ... halo_list.write_out("FOF/groups_%05i.txt" % i)
+ ... halo_list.write_particle_lists("FOF/particles_%05i" % i)
+ ...
+ >>> mt = EnzoFOFMergerTree(external_FOF=False)
+ >>> for i in range(20):
+ ... mt.build_tree(i)
+ ... mt.save_halo_evolution('halos.h5')
+ ...
+ >>> for i in range(20):
+ ... plot_halo_evolution('halos.h5', i)
+ """
+ import matplotlib.pyplot as plt
+ f = h5py.File("%s/%s" % (FOF_directory, filename), 'r')
+ basename = os.path.splitext(filename)[0]
+ halo = "halo%05d" % halo_id
+ basename = basename + "_" + halo
+ g = f[halo]
+ values = list(g)
+ index_dict = {'x' : 0, 'y' : 1, 'z' : 2, 'vx' : 3, 'vy' : 4, 'vz' : 5}
+ coords = {}
+ fields = {}
+ for i, quantity in enumerate((x_quantity, y_quantity)):
+ field = quantity
+ if quantity.startswith('COM'):
+ index = index_dict[quantity.split('_')[-1]]
+ quantity = ('COM')
+ if quantity.startswith('dense'):
+ index = index_dict[quantity.split('_')[-1]]
+ quantity = ('densest_point')
+ if quantity not in values:
+ exit('%s not in list of values in %s for halo %d' % \
+ (quantity, filename, halo_id))
+ if not field == quantity:
+ coords[i] = g[quantity][index,:]
+ else:
+ coords[i] = g[quantity]
+ if len(coords[i]) == 1:
+ # ("Only 1 value for Halo %d. Ignoring." % halo_id)
+ return
+ fields[i] = field
+
+ ax = plt.axes()
+ ax.plot(coords[0], coords[1])
+ ax.set_title(basename)
+ ax.set_xlabel(fields[0])
+ ax.set_ylabel(fields[1])
+ if x_log:
+ ax.set_xscale("log")
+ if y_log:
+ ax.set_yscale("log")
+ ofn = "%s/%s_%s_%s.png" % (FOF_directory, basename, fields[0], fields[1])
+ plt.savefig(ofn)
+ plt.clf()
diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_analysis/halo_filters.py
--- a/yt/analysis_modules/halo_analysis/halo_filters.py
+++ b/yt/analysis_modules/halo_analysis/halo_filters.py
@@ -66,7 +66,7 @@
add_filter("quantity_value", quantity_value)
-def _not_subhalo(halo, field_type="halos"):
+def not_subhalo(halo, field_type="halos"):
"""
Only return true if this halo is not a subhalo.
@@ -76,11 +76,11 @@
if not hasattr(halo.halo_catalog, "parent_dict"):
halo.halo_catalog.parent_dict = \
- create_parent_dict(halo.halo_catalog.data_source, ptype=field_type)
+ _create_parent_dict(halo.halo_catalog.data_source, ptype=field_type)
return halo.halo_catalog.parent_dict[int(halo.quantities["particle_identifier"])] == -1
-add_filter("not_subhalo", _not_subhalo)
+add_filter("not_subhalo", not_subhalo)
-def create_parent_dict(data_source, ptype="halos"):
+def _create_parent_dict(data_source, ptype="halos"):
"""
Create a dictionary of halo parents to allow for filtering of subhalos.
diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_finding/api.py
--- a/yt/analysis_modules/halo_finding/api.py
+++ b/yt/analysis_modules/halo_finding/api.py
@@ -16,16 +16,13 @@
from .halo_objects import \
Halo, \
HOPHalo, \
- parallelHOPHalo, \
LoadedHalo, \
FOFHalo, \
HaloList, \
HOPHaloList, \
FOFHaloList, \
- parallelHOPHaloList, \
LoadedHaloList, \
GenericHaloFinder, \
- parallelHF, \
HOPHaloFinder, \
FOFHaloFinder, \
HaloFinder, \
diff -r 76f1cf5a7f78987525c183db3d64a0096a5a21f3 -r 9cfc115c3d00159393789c8286df60bac83db615 yt/analysis_modules/halo_finding/halo_objects.py
--- a/yt/analysis_modules/halo_finding/halo_objects.py
+++ b/yt/analysis_modules/halo_finding/halo_objects.py
@@ -26,12 +26,14 @@
from collections import defaultdict
from yt.extern.six import add_metaclass
-from yt.funcs import *
-
from yt.config import ytcfg
+from yt.funcs import mylog
from yt.utilities.performance_counters import \
- yt_counters, time_function
-from yt.utilities.math_utils import periodic_dist, get_rotation_matrix
+ time_function, \
+ yt_counters
+from yt.utilities.math_utils import \
+ get_rotation_matrix, \
+ periodic_dist
from yt.utilities.physical_constants import \
mass_sun_cgs, \
TINY
@@ -40,11 +42,6 @@
from .hop.EnzoHop import RunHOP
from .fof.EnzoFOF import RunFOF
-try:
- from parallel_hop.parallel_hop_interface import \
- ParallelHOPHaloFinder
-except ImportError:
- mylog.debug("Parallel HOP not imported.")
from yt.utilities.parallel_tools.parallel_analysis_interface import \
ParallelDummy, \
@@ -447,7 +444,8 @@
Msun2g = mass_sun_cgs
rho_crit = rho_crit * ((1.0 + z) ** 3.0)
# Get some pertinent information about the halo.
- self.mass_bins = YTArray(np.zeros(self.bin_count + 1, dtype='float64'),'Msun')
+ self.mass_bins = self.ds.arr(np.zeros(self.bin_count + 1,
+ dtype='float64'),'Msun')
dist = np.empty(thissize, dtype='float64')
cen = self.center_of_mass()
mark = 0
@@ -694,113 +692,6 @@
pass
-class parallelHOPHalo(Halo, ParallelAnalysisInterface):
- dont_wrap = ["maximum_density", "maximum_density_location",
- "center_of_mass", "total_mass", "bulk_velocity", "maximum_radius",
- "get_size", "get_sphere", "write_particle_list", "__getitem__",
- "virial_info", "virial_bin", "virial_mass", "virial_radius",
- "rms_velocity"]
-
- def virial_info(self, bins=300):
- r"""Calculates the virial information for the halo. Generally, it is
- better to call virial_radius or virial_mass instead, which calls this
- function automatically.
- """
- # Skip if we've already calculated for this number of bins.
- if self.bin_count == bins and self.overdensity is not None:
- return None
- # Do this for all because all will use it.
- self.bin_count = bins
- period = self.data.ds.domain_right_edge - \
- self.data.ds.domain_left_edge
- self.mass_bins = np.zeros(self.bin_count + 1, dtype='float64')
- cen = self.center_of_mass()
- # Cosmology
- h = self.data.ds.hubble_constant
- Om_matter = self.data.ds.omega_matter
- z = self.data.ds.current_redshift
- rho_crit = rho_crit_g_cm3_h2 * h ** 2.0 * Om_matter # g cm^-3
- Msun2g = mass_sun_cgs
- rho_crit = rho_crit * ((1.0 + z) ** 3.0)
- # If I own some of this halo operate on the particles.
- if self.indices is not None:
- # Get some pertinent information about the halo.
- dist = np.empty(self.indices.size, dtype='float64')
- mark = 0
- # Find the distances to the particles.
- # I don't like this much, but I
- # can't see a way to eliminate a loop like this, either here or in
- # yt.math_utils.
- for pos in itertools.izip(self["particle_position_x"],
- self["particle_position_y"], self["particle_position_z"]):
- dist[mark] = periodic_dist(cen, pos, period)
- mark += 1
- dist_min, dist_max = min(dist), max(dist)
- # If I don't have this halo, make some dummy values.
- else:
- dist_min = max(period)
- dist_max = 0.0
- # In this parallel case, we're going to find the global dist extrema
- # and built identical bins on all tasks.
- dist_min = self.comm.mpi_allreduce(dist_min, op='min')
- dist_max = self.comm.mpi_allreduce(dist_max, op='max')
- # Set up the radial bins.
- # Multiply min and max to prevent issues with digitize below.
- self.radial_bins = np.logspace(math.log10(dist_min * .99 + TINY),
- math.log10(dist_max * 1.01 + 2 * TINY), num=self.bin_count + 1)
- self.radial_bins = ds.arr(self.radial_bins, 'code_length')
-
- if self.indices is not None and self.indices.size > 1:
- # Find out which bin each particle goes into, and add the particle
- # mass to that bin.
- inds = np.digitize(dist, self.radial_bins) - 1
- for index in np.unique(inds):
- self.mass_bins[index] += \
- np.sum(self["particle_mass"][inds == index]).in_units('Msun')
- # Now forward sum the masses in the bins.
- for i in xrange(self.bin_count):
- self.mass_bins[i + 1] += self.mass_bins[i]
- # Sum up the mass_bins globally
- self.mass_bins = self.comm.mpi_allreduce(self.mass_bins, op='sum')
- # Calculate the over densities in the bins.
- self.overdensity = self.mass_bins * Msun2g / \
- (4. / 3. * math.pi * rho_crit * \
- (self.radial_bins) ** 3.0)
-
- def _get_ellipsoid_parameters_basic(self):
- mylog.error("Ellipsoid calculation does not work for parallelHF halos." + \
- " Please save the halos using .dump(), and reload them using" + \
- " LoadHaloes() to use this function.")
- return None
-
- def get_ellipsoid_parameters(self):
- r"""Calculate the parameters that describe the ellipsoid of
- the particles that constitute the halo.
-
- Parameters
- ----------
- None
-
- Returns
- -------
- tuple : (cm, mag_A, mag_B, mag_C, e0_vector, tilt)
- The 6-tuple has in order:
- #. The center of mass as an array.
- #. mag_A as a float.
- #. mag_B as a float.
- #. mag_C as a float.
- #. e0_vector as an array.
- #. tilt as a float.
-
- Examples
- --------
- >>> params = halos[0].get_ellipsoid_parameters()
- """
- mylog.error("get_ellipsoid_parameters does not work for parallelHF halos." + \
- " Please save the halos using .dump(), and reload them using" + \
- " LoadHaloes() to use this function.")
- return None
-
class FOFHalo(Halo):
def maximum_density(self):
@@ -1565,254 +1456,6 @@
CoM = cen, max_radius = r, supp = temp_dict))
halo += 1
-
-class parallelHOPHaloList(HaloList, ParallelAnalysisInterface):
- """
- Run hop on *data_source* with a given density *threshold*. If
- *dm_only* is True (default), only run it on the dark matter particles, otherwise
- on all particles. Returns an iterable collection of *HopGroup* items.
- """
- _name = "parallelHOP"
- _halo_class = parallelHOPHalo
- _fields = ["particle_position_%s" % ax for ax in 'xyz'] + \
- ["particle_mass", "particle_index"]
-
- def __init__(self, data_source, padding, num_neighbors, bounds, total_mass,
- period, threshold=160.0, dm_only=True, rearrange=True, premerge=True,
- tree='F'):
- ParallelAnalysisInterface.__init__(self)
- self.threshold = threshold
- self.num_neighbors = num_neighbors
- self.bounds = bounds
- self.total_mass = total_mass
- self.rearrange = rearrange
- self.period = period
- self.old_period = period.copy()
- self.period = np.array([1.] * 3)
- self._data_source = data_source
- self.premerge = premerge
- self.tree = tree
- mylog.info("Initializing HOP")
- HaloList.__init__(self, data_source, dm_only)
-
- def _run_finder(self):
- yt_counters("Reading Data")
- # Test to make sure the particle IDs aren't suspicious.
- exit = False
- if (self.particle_fields["particle_index"] < 0).any():
- mylog.error("Negative values in particle_index field. Parallel HOP will fail.")
- exit = True
- if np.unique(self.particle_fields["particle_index"]).size != \
- self.particle_fields["particle_index"].size:
- mylog.error("Non-unique values in particle_index field. Parallel HOP will fail.")
- exit = True
-
- self.comm.mpi_exit_test(exit)
- # Try to do this in a memory conservative way.
- np.divide(self.particle_fields['particle_mass'].in_units('Msun'), self.total_mass,
- self.particle_fields['particle_mass'])
- np.divide(self.particle_fields["particle_position_x"],
- self.old_period[0], self.particle_fields["particle_position_x"])
- np.divide(self.particle_fields["particle_position_y"],
- self.old_period[1], self.particle_fields["particle_position_y"])
- np.divide(self.particle_fields["particle_position_z"],
- self.old_period[2], self.particle_fields["particle_position_z"])
- obj = ParallelHOPHaloFinder(self.period, self.padding,
- self.num_neighbors, self.bounds,
- self.particle_fields,
- self.threshold, rearrange=self.rearrange, premerge=self.premerge,
- tree=self.tree)
- self.densities, self.tags = obj.density, obj.chainID
- # I'm going to go ahead and delete self.densities because it's not
- # actually being used. I'm not going to remove it altogether because
- # it may be useful to someone someday.
- del self.densities
- self.group_count = obj.group_count
- self.group_sizes = obj.group_sizes
- if self.group_count == 0:
- mylog.info("There are no halos found.")
- return
- self.CoM = obj.CoM
- self.Tot_M = obj.Tot_M * self.total_mass
- self.max_dens_point = obj.max_dens_point
- self.max_radius = obj.max_radius
- for dd in range(3):
- self.CoM[:, dd] *= self.old_period[dd]
- self.max_dens_point[:, dd + 1] *= self.old_period[dd]
- # This is wrong, below, with uneven boundaries. We'll cross that bridge
- # when we get there.
- self.max_radius *= self.old_period[0]
- self.period = self.old_period.copy()
- # Precompute the bulk velocity in parallel.
- yt_counters("Precomp bulk vel.")
- self.bulk_vel = np.zeros((self.group_count, 3), dtype='float64')
- yt_counters("bulk vel. reading data")
- pm = obj.mass
- # Fix this back to un-normalized units.
- np.multiply(pm, self.total_mass, pm)
- xv = self._data_source["particle_velocity_x"][self._base_indices]
- yv = self._data_source["particle_velocity_y"][self._base_indices]
- zv = self._data_source["particle_velocity_z"][self._base_indices]
- yt_counters("bulk vel. reading data")
- yt_counters("bulk vel. computing")
- select = (self.tags >= 0)
- calc = len(np.where(select == True)[0])
- if calc:
- vel = np.empty((calc, 3), dtype='float64')
- ms = pm[select]
- vel[:, 0] = xv[select] * ms
- vel[:, 1] = yv[select] * ms
- vel[:, 2] = zv[select] * ms
- subchain = self.tags[select]
- sort = subchain.argsort()
- vel = vel[sort]
- sort_subchain = subchain[sort]
- uniq_subchain = np.unique(sort_subchain)
- diff_subchain = np.ediff1d(sort_subchain)
- marks = (diff_subchain > 0)
- marks = np.arange(calc)[marks] + 1
- marks = np.concatenate(([0], marks, [calc]))
- for i, u in enumerate(uniq_subchain):
- self.bulk_vel[u] = np.sum(vel[marks[i]:marks[i + 1]], axis=0)
- del vel, subchain, sort_subchain
- del diff_subchain
- # Bring it together, and divide by the previously computed total mass
- # of each halo.
- self.bulk_vel = self.comm.mpi_allreduce(self.bulk_vel, op='sum')
- for groupID in xrange(self.group_count):
- self.bulk_vel[groupID] = \
- self.bulk_vel[groupID] / self.Tot_M[groupID]
- yt_counters("bulk vel. computing")
- # Now calculate the RMS velocity of the groups in parallel, very
- # similarly to the bulk velocity and re-using some of the arrays.
- yt_counters("rms vel computing")
- rms_vel_temp = np.zeros((self.group_count, 2), dtype='float64')
- if calc:
- vel = np.empty((calc, 3), dtype='float64')
- vel[:, 0] = xv[select] * ms
- vel[:, 1] = yv[select] * ms
- vel[:, 2] = zv[select] * ms
- vel = vel[sort]
- for i, u in enumerate(uniq_subchain):
- # This finds the sum locally.
- rms_vel_temp[u][0] = np.sum(((vel[marks[i]:marks[i + 1]] - \
- self.bulk_vel[u]) / self.Tot_M[u]) ** 2.)
- # I could use self.group_sizes...
- rms_vel_temp[u][1] = marks[i + 1] - marks[i]
- del vel, marks, uniq_subchain
- # Bring it together.
- rms_vel_temp = self.comm.mpi_allreduce(rms_vel_temp, op='sum')
- self.rms_vel = np.empty(self.group_count, dtype='float64')
- for groupID in xrange(self.group_count):
- # Here we do the Mean and the Root.
- self.rms_vel[groupID] = \
- np.sqrt(rms_vel_temp[groupID][0] / rms_vel_temp[groupID][1]) * \
- self.group_sizes[groupID]
- del rms_vel_temp
- yt_counters("rms vel computing")
- self.taskID = obj.mine
- self.halo_taskmap = obj.halo_taskmap # A defaultdict.
- del obj
- gc.collect()
- yt_counters("Precomp bulk vel.")
-
- def _parse_output(self):
- yt_counters("Final Grouping")
- """
- Each task will make an entry for all groups, but it may be empty.
- """
- unique_ids = np.unique(self.tags)
- counts = np.bincount((self.tags + 1).tolist())
- sort_indices = np.argsort(self.tags)
- grab_indices = np.indices(self.tags.shape).ravel()[sort_indices]
- del sort_indices
- cp = 0
- index = 0
- # We want arrays for parallel HOP
- self._groups = np.empty(self.group_count, dtype='object')
- self._max_dens = np.empty((self.group_count, 4), dtype='float64')
- if self.group_count == 0:
- mylog.info("There are no halos found.")
- return
- for i in unique_ids:
- if i == -1:
- cp += counts[i + 1]
- continue
- # If there is a gap in the unique_ids, make empty groups to
- # fill it in.
- while index < i:
- self._groups[index] = self._halo_class(self, index, \
- size=self.group_sizes[index], CoM=self.CoM[index], \
- max_dens_point=self.max_dens_point[index], \
- group_total_mass=self.Tot_M[index],
- max_radius=self.max_radius[index],
- bulk_vel=self.bulk_vel[index],
- tasks=self.halo_taskmap[index],
- rms_vel=self.rms_vel[index])
- # I don't own this halo
- self.comm.do_not_claim_object(self._groups[index])
- self._max_dens[index] = [self.max_dens_point[index][0],
- self.max_dens_point[index][1], \
- self.max_dens_point[index][2],
- self.max_dens_point[index][3]]
- index += 1
- cp_c = cp + counts[i + 1]
- group_indices = grab_indices[cp:cp_c]
- self._groups[index] = self._halo_class(self, i, group_indices, \
- size=self.group_sizes[i], CoM=self.CoM[i], \
- max_dens_point=self.max_dens_point[i], \
- group_total_mass=self.Tot_M[i], max_radius=self.max_radius[i],
- bulk_vel=self.bulk_vel[i], tasks=self.halo_taskmap[index],
- rms_vel=self.rms_vel[i])
- # This halo may be owned by many, including this task
- self.comm.claim_object(self._groups[index])
- self._max_dens[index] = [self.max_dens_point[i][0],
- self.max_dens_point[i][1], \
- self.max_dens_point[i][2], self.max_dens_point[i][3]]
- cp += counts[i + 1]
- index += 1
- # If there are missing groups at the end, add them.
- while index < self.group_count:
- self._groups[index] = self._halo_class(self, index, \
- size=self.group_sizes[index], CoM=self.CoM[index], \
- max_dens_point=self.max_dens_point[index], \
- group_total_mass=self.Tot_M[index],
- max_radius=self.max_radius[index],
- bulk_vel=self.bulk_vel[index], tasks=self.halo_taskmap[index],
- rms_vel=self.rms_vel[index])
- self.comm.do_not_claim_object(self._groups[index])
- self._max_dens[index] = [self.max_dens_point[index][0],
- self.max_dens_point[index][1], \
- self.max_dens_point[index][2], self.max_dens_point[index][3]]
- index += 1
- # Clean up
- del self.max_dens_point, self.max_radius, self.bulk_vel
- del self.halo_taskmap, self.tags, self.rms_vel
- del grab_indices, unique_ids, counts
- try:
- del group_indices
- except UnboundLocalError:
- pass
-
- def __len__(self):
- return self.group_count
-
- def write_out(self, filename="parallelHopAnalysis.out", ellipsoid_data=False):
- r"""Write out standard halo information to a text file.
-
- Parameters
- ----------
- filename : String
- The name of the file to write to.
- Default = "parallelHopAnalysis.out".
-
- Examples
- --------
- >>> halos.write_out("parallelHopAnalysis.out")
- """
- HaloList.write_out(self, filename, ellipsoid_data)
-
-
class GenericHaloFinder(HaloList, ParallelAnalysisInterface):
def __init__(self, ds, data_source, dm_only=True, padding=0.0):
ParallelAnalysisInterface.__init__(self)
@@ -2007,334 +1650,6 @@
self.write_particle_lists(basename)
self.write_particle_lists_txt(basename)
-
-class parallelHF(GenericHaloFinder, parallelHOPHaloList):
- r"""Parallel HOP halo finder.
-
- Halos are built by:
- 1. Calculating a density for each particle based on a smoothing kernel.
- 2. Recursively linking particles to other particles from lower density
- particles to higher.
- 3. Geometrically proximate chains are identified and
- 4. merged into final halos following merging rules.
-
- Lower thresholds generally produce more halos, and the largest halos
- become larger. Also, halos become more filamentary and over-connected.
-
- This is very similar to HOP, but it does not produce precisely the
- same halos due to unavoidable numerical differences.
-
- Skory et al. "Parallel HOP: A Scalable Halo Finder for Massive
- Cosmological Data Sets." arXiv (2010) 1001.3411
-
- Parameters
- ----------
- ds : `Dataset`
- The dataset on which halo finding will be conducted.
- threshold : float
- The density threshold used when building halos. Default = 160.0.
- dm_only : bool
- If True, only dark matter particles are used when building halos.
- Default = True.
- resize : bool
- Turns load-balancing on or off. Default = True.
- kdtree : string
- Chooses which kD Tree to use. The Fortran one (kdtree = 'F') is
- faster, but uses more memory. The Cython one (kdtree = 'C') is
- slower but is more memory efficient.
- Default = 'F'
- rearrange : bool
- Turns on faster nearest neighbor searches at the cost of increased
- memory usage.
- This option only applies when using the Fortran tree.
- Default = True.
- fancy_padding : bool
- True calculates padding independently for each face of each
- subvolume. Default = True.
- safety : float
- Due to variances in inter-particle spacing in the volume, the
- padding may need to be increased above the raw calculation.
- This number is multiplied to the calculated padding, and values
- >1 increase the padding. Default = 1.5.
- premerge : bool
- True merges chains in two steps (rather than one with False), which
- can speed up halo finding by 25% or more. However, True can result
- in small (<<1%) variations in the final halo masses when compared
- to False. Default = True.
- sample : float
- The fraction of the full dataset on which load-balancing is
- performed. Default = 0.03.
- total_mass : float
- If HOP is run on the same dataset mulitple times, the total mass
- of particles in Msun units in the full volume can be supplied here
- to save time.
- This must correspond to the particles being operated on, meaning
- if stars are included in the halo finding, they must be included
- in this mass as well, and visa-versa.
- If halo finding on a subvolume, this still corresponds with the
- mass in the entire volume.
- Default = None, which means the total mass is automatically
- calculated.
- num_particles : integer
- The total number of particles in the volume, in the same fashion
- as `total_mass` is calculated. Specifying this turns off
- fancy_padding.
- Default = None, which means the number of particles is
- automatically calculated.
-
- Examples
- -------
- >>> ds = load("RedshiftOutput0000")
- >>> halos = parallelHF(ds)
- """
- def __init__(self, ds, subvolume=None, threshold=160, dm_only=True, \
- resize=True, rearrange=True,\
- fancy_padding=True, safety=1.5, premerge=True, sample=0.03, \
- total_mass=None, num_particles=None, tree='F'):
- if subvolume is not None:
- ds_LE = np.array(subvolume.left_edge)
- ds_RE = np.array(subvolume.right_edge)
- self._data_source = ds.all_data()
- GenericHaloFinder.__init__(self, ds, self._data_source, dm_only,
- padding=0.0)
- self.padding = 0.0
- self.num_neighbors = 65
- self.safety = safety
- self.sample = sample
- self.tree = tree
- if self.tree != 'F' and self.tree != 'C':
- mylog.error("No kD Tree specified!")
- period = ds.domain_right_edge - ds.domain_left_edge
- topbounds = np.array([[0., 0., 0.], period])
- # Cut up the volume evenly initially, with no padding.
- padded, LE, RE, self._data_source = \
- self.partition_index_3d(ds=self._data_source,
- padding=self.padding)
- # also get the total mass of particles
- yt_counters("Reading Data")
- # Adaptive subregions by bisection. We do not load balance if we are
- # analyzing a subvolume.
- ds_names = ["particle_position_x", "particle_position_y",
- "particle_position_z"]
- if ytcfg.getboolean("yt", "inline") == False and \
- resize and self.comm.size != 1 and subvolume is None:
- random.seed(self.comm.rank)
- cut_list = self.partition_index_3d_bisection_list()
- root_points = self._subsample_points()
- self.bucket_bounds = []
- if self.comm.rank == 0:
- self._recursive_divide(root_points, topbounds, 0, cut_list)
- self.bucket_bounds = \
- self.comm.mpi_bcast(self.bucket_bounds)
- my_bounds = self.bucket_bounds[self.comm.rank]
- LE, RE = my_bounds[0], my_bounds[1]
- self._data_source = self.ds.region([0.] * 3, LE, RE)
- # If this isn't parallel, define the region as an AMRRegionStrict so
- # particle IO works.
- if self.comm.size == 1:
- self._data_source = self.ds.region([0.5] * 3,
- LE, RE)
- # get the average spacing between particles for this region
- # The except is for the serial case where the full box is what we want.
- if num_particles is None:
- data = self._data_source["particle_position_x"]
- try:
- l = self._data_source.right_edge - self._data_source.left_edge
- except AttributeError:
- l = ds.domain_right_edge - ds.domain_left_edge
- vol = l[0] * l[1] * l[2]
- full_vol = vol
- # We will use symmetric padding when a subvolume is being used.
- if not fancy_padding or subvolume is not None or \
- num_particles is not None:
- if num_particles is None:
- num_particles = data.size
- avg_spacing = (float(vol) / num_particles) ** (1. / 3.)
- # padding is a function of inter-particle spacing, this is an
- # approximation, but it's OK with the safety factor
- padding = (self.num_neighbors) ** (1. / 3.) * self.safety * \
- avg_spacing
- self.padding = (np.ones(3, dtype='float64') * padding,
- np.ones(3, dtype='float64') * padding)
- mylog.info('padding %s avg_spacing %f vol %f local_parts %d' % \
- (str(self.padding), avg_spacing, vol, num_particles))
- # Another approach to padding, perhaps more accurate.
- elif fancy_padding and self._distributed:
- LE_padding = np.empty(3, dtype='float64')
- RE_padding = np.empty(3, dtype='float64')
- avg_spacing = (vol / data.size) ** (1. / 3.)
- base_padding = (self.num_neighbors) ** (1. / 3.) * self.safety * \
- avg_spacing
- for dim in xrange(3):
- if ytcfg.getboolean("yt", "inline") == False:
- data = self._data_source[ds_names[dim]]
- else:
- data = self._data_source[ds_names[dim]]
- num_bins = 1000
- width = self._data_source.right_edge[dim] - \
- self._data_source.left_edge[dim]
- area = (self._data_source.right_edge[(dim + 1) % 3] - \
- self._data_source.left_edge[(dim + 1) % 3]) * \
- (self._data_source.right_edge[(dim + 2) % 3] - \
- self._data_source.left_edge[(dim + 2) % 3])
- bin_width = base_padding
- num_bins = int(math.ceil(width / bin_width))
- bins = np.arange(num_bins + 1, dtype='float64') * bin_width + \
- self._data_source.left_edge[dim]
- counts, bins = np.histogram(data, bins)
- # left side.
- start = 0
- count = counts[0]
- while count < self.num_neighbors:
- start += 1
- count += counts[start]
- # Get the avg spacing in just this boundary.
- vol = area * (bins[start + 1] - bins[0])
- avg_spacing = (float(vol) / count) ** (1. / 3.)
- LE_padding[dim] = (self.num_neighbors) ** (1. / 3.) * \
- self.safety * avg_spacing
- # right side.
- start = -1
- count = counts[-1]
- while count < self.num_neighbors:
- start -= 1
- count += counts[start]
- vol = area * (bins[-1] - bins[start - 1])
- avg_spacing = (float(vol) / count) ** (1. / 3.)
- RE_padding[dim] = (self.num_neighbors) ** (1. / 3.) * \
- self.safety * avg_spacing
- self.padding = (LE_padding, RE_padding)
- del bins, counts
- mylog.info('fancy_padding %s avg_spacing %f full_vol %f local_parts %d %s' % \
- (str(self.padding), avg_spacing, full_vol,
- data.size, str(self._data_source)))
- # Now we get the full box mass after we have the final composition of
- # subvolumes.
- if total_mass is None:
- total_mass = self.comm.mpi_allreduce((self._data_source["particle_mass"].in_units('Msun').astype('float64')).sum(),
- op='sum')
- if not self._distributed:
- self.padding = (np.zeros(3, dtype='float64'),
- np.zeros(3, dtype='float64'))
- # If we're using a subvolume, we now re-divide.
- if subvolume is not None:
- self._data_source = ds.region([0.] * 3, ds_LE, ds_RE)
- # Cut up the volume.
- padded, LE, RE, self._data_source = \
- self.partition_index_3d(ds=self._data_source,
- padding=0.)
- self.bounds = (LE, RE)
- (LE_padding, RE_padding) = self.padding
- parallelHOPHaloList.__init__(self, self._data_source, self.padding, \
- self.num_neighbors, self.bounds, total_mass, period, \
- threshold=threshold, dm_only=dm_only, rearrange=rearrange,
- premerge=premerge, tree=self.tree)
- self._join_halolists()
- yt_counters("Final Grouping")
-
- def _subsample_points(self):
- # Read in a random subset of the points in each domain, and then
- # collect them on the root task.
- xp = self._data_source["particle_position_x"]
- n_parts = self.comm.mpi_allreduce(xp.size, op='sum')
- local_parts = xp.size
- random_points = int(self.sample * n_parts)
- # We want to get a representative selection of random particles in
- # each subvolume.
- adjust = float(local_parts) / (float(n_parts) / self.comm.size)
- n_random = int(adjust * float(random_points) / self.comm.size)
- mylog.info("Reading in %d random particles." % n_random)
- # Get unique random particles.
- my_points = np.empty((n_random, 3), dtype='float64')
- uni = np.array(random.sample(xrange(xp.size), n_random))
- uni = uni[uni.argsort()]
- my_points[:, 0] = xp[uni]
- del xp
- self._data_source.clear_data()
- my_points[:, 1] = self._data_source["particle_position_y"][uni]
- self._data_source.clear_data()
- my_points[:, 2] = self._data_source["particle_position_z"][uni]
- self._data_source.clear_data()
- del uni
- # Collect them on the root task.
- mine, sizes = self.comm.mpi_info_dict(n_random)
- if mine == 0:
- tot_random = sum(sizes.values())
- root_points = np.empty((tot_random, 3), dtype='float64')
- root_points.shape = (1, 3 * tot_random)
- else:
- root_points = np.empty([])
- my_points.shape = (1, n_random * 3)
- root_points = self.comm.par_combine_object(my_points[0],
- datatype="array", op="cat")
- del my_points
- if mine == 0:
- root_points.shape = (tot_random, 3)
- return root_points
-
- def _recursive_divide(self, points, bounds, level, cut_list):
- dim = cut_list[level][0]
- parts = points.shape[0]
- num_bins = 1000
- width = bounds[1][dim] - bounds[0][dim]
- bin_width = width / num_bins
- bins = np.arange(num_bins + 1, dtype='float64') * bin_width + \
- bounds[0][dim]
- counts, bins = np.histogram(points[:, dim], bins)
- # Find the bin that passes the cut points.
- midpoints = [bounds[0][dim]]
- sum = 0
- bin = 0
- for step in xrange(1, cut_list[level][1]):
- while sum < ((parts * step) / cut_list[level][1]):
- lastsum = sum
- sum += counts[bin]
- bin += 1
- # Bin edges
- left_edge = bins[bin - 1]
- right_edge = bins[bin]
- # Find a better approx of the midpoint cut
- # line using a linear approx.
- a = float(sum - lastsum) / (right_edge - left_edge)
- midpoints.append(left_edge + (0.5 - \
- (float(lastsum) / parts / 2)) / a)
- midpoints.append(bounds[1][dim])
-
- # Split the points & update the bounds.
- subpoints = []
- subbounds = []
- for pair in zip(midpoints[:-1], midpoints[1:]):
- select = np.bitwise_and(points[:, dim] >= pair[0],
- points[:, dim] < pair[1])
- subpoints.append(points[select])
- nb = bounds.copy()
- nb[0][dim] = pair[0]
- nb[1][dim] = pair[1]
- subbounds.append(nb)
- # If we're at the maxlevel, make a bucket. Otherwise, recurse down.
- maxlevel = len(cut_list) - 1
- for pair in zip(subpoints, subbounds):
- if level == maxlevel:
- self.bucket_bounds.append(pair[1])
- else:
- self._recursive_divide(pair[0], pair[1], level + 1, cut_list)
-
- def _join_halolists(self):
- if self.group_count == 0:
- mylog.info("There are no halos found.")
- return
- ms = -self.Tot_M.copy()
- del self.Tot_M
- Cx = self.CoM[:, 0].copy()
- sorted = np.lexsort([Cx, ms])
- del Cx, ms
- self._groups = self._groups[sorted]
- self._max_dens = self._max_dens[sorted]
- for i in xrange(self.group_count):
- self._groups[i].id = i
- del sorted, self.group_sizes, self.CoM
-
-
class HOPHaloFinder(GenericHaloFinder, HOPHaloList):
r"""HOP halo finder.
This diff is so big that we needed to truncate the remainder.
Repository URL: https://bitbucket.org/yt_analysis/yt/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the yt-svn
mailing list