[Yt-svn] yt: 4 new changesets

Wed Nov 3 09:38:05 PDT 2010

hg Repository: yt
details:   yt/rev/d6502d2ac18f
changeset: 3496:d6502d2ac18f
user:      Stephen Skory <stephenskory at yahoo.com>
date:
Mon Nov 01 09:34:30 2010 -0600
description:
Removing two unused functions for parallel doman division.

hg Repository: yt
details:   yt/rev/b013f3de0fd1
changeset: 3497:b013f3de0fd1
user:      Stephen Skory <stephenskory at yahoo.com>
date:
Tue Nov 02 16:15:46 2010 -0600
description:
Finding halos in a sub-volume works for HOP in serial.

hg Repository: yt
details:   yt/rev/8402bb185374
changeset: 3498:8402bb185374
user:      Stephen Skory <stephenskory at yahoo.com>
date:
Tue Nov 02 17:53:58 2010 -0600
description:
FOF subvolumes works, and both HOP and FOF work in parallel, which is
pretty cool.

hg Repository: yt
details:   yt/rev/4fb0d255edbe
changeset: 3499:4fb0d255edbe
user:      Stephen Skory <stephenskory at yahoo.com>
date:
Wed Nov 03 10:37:32 2010 -0600
description:
It looks like halo finding on subvolumes works for parallel HOP now,
in serial and in parallel.

diffstat:

 yt/analysis_modules/halo_finding/halo_objects.py               |   93 ++-
 yt/analysis_modules/two_point_functions/two_point_functions.py |    8 +-
 yt/utilities/parallel_tools/parallel_analysis_interface.py     |  309 +---------
 3 files changed, 94 insertions(+), 316 deletions(-)

diffs (truncated from 560 to 300 lines):

diff -r 276e74af822b -r 4fb0d255edbe yt/analysis_modules/halo_finding/halo_objects.py

--- a/yt/analysis_modules/halo_finding/halo_objects.py	Fri Oct 29 16:04:40 2010 -0700
+++ b/yt/analysis_modules/halo_finding/halo_objects.py	Wed Nov 03 10:37:32 2010 -0600
@@ -1328,10 +1328,10 @@
         HaloList.write_out(self, filename)
 
 class GenericHaloFinder(HaloList, ParallelAnalysisInterface):
-    def __init__(self, pf, dm_only=True, padding=0.0):
+    def __init__(self, pf, ds, dm_only=True, padding=0.0):
         self.pf = pf
         self.hierarchy = pf.h
-        self.center = (pf.domain_right_edge + pf.domain_left_edge)/2.0
+        self.center = (na.array(ds.right_edge) + na.array(ds.left_edge))/2.0
 
     def _parse_halolist(self, threshold_adjustment):
         groups, max_dens, hi  = [], {}, 0
@@ -1473,7 +1473,8 @@
             halo.write_particle_list(f)
 
 class parallelHF(GenericHaloFinder, parallelHOPHaloList):
-    def __init__(self, pf, threshold=160, dm_only=True, resize=True, rearrange=True,\
+    def __init__(self, pf, subvolume=None,threshold=160, dm_only=True, \
+        resize=True, rearrange=True,\
         fancy_padding=True, safety=1.5, premerge=True, sample=0.03):
         r"""Parallel HOP halo finder.
         
@@ -1528,7 +1529,12 @@
         >>> pf = load("RedshiftOutput0000")
         >>> halos = parallelHF(pf)
         """
-        GenericHaloFinder.__init__(self, pf, dm_only, padding=0.0)
+        if subvolume is not None:
+            ds_LE = na.array(subvolume.left_edge)
+            ds_RE = na.array(subvolume.right_edge)
+        self._data_source = pf.h.all_data()
+        GenericHaloFinder.__init__(self, pf, self._data_source, dm_only,
+            padding=0.0)
         self.padding = 0.0
         self.num_neighbors = 65
         self.safety = safety
@@ -1536,13 +1542,16 @@
         period = pf.domain_right_edge - pf.domain_left_edge
         topbounds = na.array([[0., 0., 0.], period])
         # Cut up the volume evenly initially, with no padding.
-        padded, LE, RE, self._data_source = self._partition_hierarchy_3d(padding=self.padding)
+        padded, LE, RE, self._data_source = \
+            self._partition_hierarchy_3d(ds=self._data_source,
+            padding=self.padding)
         # also get the total mass of particles
         yt_counters("Reading Data")
-        # Adaptive subregions by bisection.
+        # Adaptive subregions by bisection. We do not load balance if we are
+        # analyzing a subvolume.
         ds_names = ["particle_position_x","particle_position_y","particle_position_z"]
         if ytcfg.getboolean("yt","inline") == False and \
-            resize and self._mpi_get_size() != 1:
+            resize and self._mpi_get_size() != 1 and subvolume is None:
             random.seed(self._mpi_get_rank())
             cut_list = self._partition_hierarchy_3d_bisection_list()
             root_points = self._subsample_points()
@@ -1569,7 +1578,8 @@
             l = pf.domain_right_edge - pf.domain_left_edge
         vol = l[0] * l[1] * l[2]
         full_vol = vol
-        if not fancy_padding:
+        # We will use symmetric padding when a subvolume is being used.
+        if not fancy_padding or subvolume is not None:
             avg_spacing = (float(vol) / data.size)**(1./3.)
             # padding is a function of inter-particle spacing, this is an
             # approximation, but it's OK with the safety factor
@@ -1626,6 +1636,13 @@
             total_mass = self._mpi_allsum((self._data_source["ParticleMassMsun"].astype('float64')).sum())
         if not self._distributed:
             self.padding = (na.zeros(3,dtype='float64'), na.zeros(3,dtype='float64'))
+        # If we're using a subvolume, we now re-divide.
+        if subvolume is not None:
+            self._data_source = pf.h.periodic_region_strict([0.]*3, ds_LE, ds_RE)
+            # Cut up the volume.
+            padded, LE, RE, self._data_source = \
+                self._partition_hierarchy_3d(ds=self._data_source,
+                padding=0.)
         self.bounds = (LE, RE)
         (LE_padding, RE_padding) = self.padding
         parallelHOPHaloList.__init__(self, self._data_source, self.padding, \
@@ -1734,7 +1751,8 @@
 
 
 class HOPHaloFinder(GenericHaloFinder, HOPHaloList):
-    def __init__(self, pf, threshold=160, dm_only=True, padding=0.02):
+    def __init__(self, pf, subvolume=None, threshold=160, dm_only=True,
+            padding=0.02):
         r"""HOP halo finder.
         
         Halos are built by:
@@ -1753,6 +1771,9 @@
         Parameters
         ----------
         pf : EnzoStaticOutput object
+        subvolume : A region over which HOP will be run, which can be used
+            to run HOP on a subvolume of the full volume. Default = None,
+            which defaults to the full volume automatically.
         threshold : float
             The density threshold used when building halos. Default = 160.0.
         dm_only : bool
@@ -1769,37 +1790,49 @@
         >>> pf = load("RedshiftOutput0000")
         >>> halos = HaloFinder(pf)
         """
-        GenericHaloFinder.__init__(self, pf, dm_only, padding)
-        
-        # do it once with no padding so the total_mass is correct (no duplicated particles)
+        if subvolume is not None:
+            ds_LE = na.array(subvolume.left_edge)
+            ds_RE = na.array(subvolume.right_edge)
+        self._data_source = pf.h.all_data()
+        GenericHaloFinder.__init__(self, pf, self._data_source, dm_only, padding)
+        # do it once with no padding so the total_mass is correct
+        # (no duplicated particles), and on the entire volume, even if only
+        # a small part is actually going to be used.
         self.padding = 0.0
-        padded, LE, RE, self._data_source = self._partition_hierarchy_3d(padding=self.padding)
+        padded, LE, RE, self._data_source = \
+            self._partition_hierarchy_3d(ds = self._data_source, padding=self.padding)
         # For scaling the threshold, note that it's a passthrough
         if dm_only:
             select = self._get_dm_indices()
-            total_mass = self._mpi_allsum((self._data_source["ParticleMassMsun"][select]).sum())
+            total_mass = \
+                self._mpi_allsum((self._data_source["ParticleMassMsun"][select]).sum(dtype='float64'))
         else:
-            total_mass = self._mpi_allsum(self._data_source["ParticleMassMsun"].sum())
+            total_mass = self._mpi_allsum(self._data_source["ParticleMassMsun"].sum(dtype='float64'))
         # MJT: Note that instead of this, if we are assuming that the particles
         # are all on different processors, we should instead construct an
         # object representing the entire domain and sum it "lazily" with
         # Derived Quantities.
+        if subvolume is not None:
+            self._data_source = pf.h.periodic_region_strict([0.]*3, ds_LE, ds_RE)
         self.padding = padding #* pf["unitary"] # This should be clevererer
-        padded, LE, RE, self._data_source = self._partition_hierarchy_3d(padding=self.padding)
+        padded, LE, RE, self._data_source = \
+            self._partition_hierarchy_3d(ds = self._data_source,
+            padding=self.padding)
         self.bounds = (LE, RE)
         # reflect particles around the periodic boundary
         #self._reposition_particles((LE, RE))
         if dm_only:
             select = self._get_dm_indices()
-            sub_mass = self._data_source["ParticleMassMsun"][select].sum()
+            sub_mass = self._data_source["ParticleMassMsun"][select].sum(dtype='float64')
         else:
-            sub_mass = self._data_source["ParticleMassMsun"].sum()
-        HOPHaloList.__init__(self, self._data_source, threshold*total_mass/sub_mass, dm_only)
+            sub_mass = self._data_source["ParticleMassMsun"].sum(dtype='float64')
+        HOPHaloList.__init__(self, self._data_source,
+            threshold*total_mass/sub_mass, dm_only)
         self._parse_halolist(total_mass/sub_mass)
         self._join_halolists()
 
 class FOFHaloFinder(GenericHaloFinder, FOFHaloList):
-    def __init__(self, pf, link=0.2, dm_only=True, padding=0.02):
+    def __init__(self, pf, subvolume=None, link=0.2, dm_only=True, padding=0.02):
         r"""Friends-of-friends halo finder.
         
         Halos are found by linking together all pairs of particles closer than
@@ -1815,6 +1848,9 @@
         Parameters
         ----------
         pf : EnzoStaticOutput object
+        subvolume : A region over which FOF will be run, which can be used
+            to run FOF on a subvolume of the full volume. Default = None,
+            which defaults to the full volume automatically.
         link : float
             The interparticle distance (compared to the overall average)
             used to build the halos. Default = 0.2.
@@ -1832,19 +1868,30 @@
         >>> pf = load("RedshiftOutput0000")
         >>> halos = FOFHaloFinder(pf)
         """
+        if subvolume is not None:
+            ds_LE = na.array(subvolume.left_edge)
+            ds_RE = na.array(subvolume.right_edge)
         self.pf = pf
         self.hierarchy = pf.h
-        self.center = (pf.domain_right_edge + pf.domain_left_edge)/2.0
+        self._data_source = pf.h.all_data()
+        GenericHaloFinder.__init__(self, pf, self._data_source, dm_only,
+            padding)
         self.padding = 0.0 #* pf["unitary"] # This should be clevererer
         # get the total number of particles across all procs, with no padding
-        padded, LE, RE, self._data_source = self._partition_hierarchy_3d(padding=self.padding)
+        padded, LE, RE, self._data_source = \
+            self._partition_hierarchy_3d(ds=self._data_source,
+            padding=self.padding)
         n_parts = self._mpi_allsum(self._data_source["particle_position_x"].size)
         # get the average spacing between particles
         l = pf.domain_right_edge - pf.domain_left_edge
         vol = l[0] * l[1] * l[2]
         avg_spacing = (float(vol) / n_parts)**(1./3.)
         self.padding = padding
-        padded, LE, RE, self._data_source = self._partition_hierarchy_3d(padding=self.padding)
+        if subvolume is not None:
+            self._data_source = pf.h.periodic_region_strict([0.]*3, ds_LE, ds_RE)
+        padded, LE, RE, self._data_source = \
+            self._partition_hierarchy_3d(ds=self._data_source,
+            padding=self.padding)
         self.bounds = (LE, RE)
         # reflect particles around the periodic boundary
         #self._reposition_particles((LE, RE))
diff -r 276e74af822b -r 4fb0d255edbe yt/analysis_modules/two_point_functions/two_point_functions.py
--- a/yt/analysis_modules/two_point_functions/two_point_functions.py	Fri Oct 29 16:04:40 2010 -0700
+++ b/yt/analysis_modules/two_point_functions/two_point_functions.py	Wed Nov 03 10:37:32 2010 -0600
@@ -143,7 +143,13 @@
         if not left_edge or not right_edge:
             self.left_edge = self.pf.domain_left_edge
             self.right_edge = self.pf.domain_right_edge
-            padded, self.LE, self.RE, self.ds = self._partition_hierarchy_3d(padding=0.,
+            # This ds business below has to do with changes made for halo
+            # finding on subvolumes and serves no purpose here except
+            # compatibility. This is not the best policy, if I'm honest.
+            ds = pf.h.periodic_region_strict([0.]*3, self.left_edge, 
+                self.right_edge)
+            padded, self.LE, self.RE, self.ds = \
+            self._partition_hierarchy_3d(ds = ds, padding=0.,
                 rank_ratio = self.vol_ratio)
         else:
             self.left_edge = left_edge
diff -r 276e74af822b -r 4fb0d255edbe yt/utilities/parallel_tools/parallel_analysis_interface.py
--- a/yt/utilities/parallel_tools/parallel_analysis_interface.py	Fri Oct 29 16:04:40 2010 -0700
+++ b/yt/utilities/parallel_tools/parallel_analysis_interface.py	Wed Nov 03 10:37:32 2010 -0600
@@ -317,10 +317,21 @@
         box = self.hierarchy.inclined_box(norigin, nbox_vectors)
         return True, box, resolution
         
-    def _partition_hierarchy_3d(self, padding=0.0, rank_ratio = 1):
-        LE, RE = self.pf.domain_left_edge.copy(), self.pf.domain_right_edge.copy()
-        if not self._distributed:
-           return False, LE, RE, self.hierarchy.all_data()
+    def _partition_hierarchy_3d(self, ds, padding=0.0, rank_ratio = 1):
+        LE, RE = na.array(ds.left_edge), na.array(ds.right_edge)
+        # We need to establish if we're looking at a subvolume, in which case
+        # we *do* want to pad things.
+        if (LE == self.pf.domain_left_edge).all() and \
+                (RE == self.pf.domain_right_edge).all():
+            subvol = False
+        else:
+            subvol = True
+        if not self._distributed and not subvol:
+           return False, LE, RE, ds
+        if not self._distributed and subvol:
+            return True, LE, RE, \
+            self.hierarchy.periodic_region_strict(self.center,
+                LE-padding, RE+padding)
         elif ytcfg.getboolean("yt", "inline"):
             # At this point, we want to identify the root grid tile to which
             # this processor is assigned.
@@ -347,7 +358,8 @@
 
         if padding > 0:
             return True, \
-                LE, RE, self.hierarchy.periodic_region_strict(self.center, LE-padding, RE+padding)
+                LE, RE, self.hierarchy.periodic_region_strict(self.center,
+                LE-padding, RE+padding)
 
         return False, LE, RE, self.hierarchy.region_strict(self.center, LE, RE)
 
@@ -517,293 +529,6 @@
         return new_group, new_comm, my_LE, my_RE, new_top_bounds, cc,\
             self.hierarchy.region_strict(self.center, my_LE, my_RE)
 
-    def _partition_hierarchy_3d_weighted_1d(self, weight=None, bins=None, padding=0.0, axis=0, min_sep=.1):
-        LE, RE = self.pf.domain_left_edge.copy(), self.pf.domain_right_edge.copy()
-        if not self._distributed:
-           return False, LE, RE, self.hierarchy.grid_collection(self.center, self.hierarchy.grids)
-
-        cc = MPI.Compute_dims(MPI.COMM_WORLD.size, 3)
-        mi = MPI.COMM_WORLD.rank
-        si = MPI.COMM_WORLD.size
-        cx, cy, cz = na.unravel_index(mi, cc)
-
-        gridx = na.mgrid[LE[0]:RE[0]:(cc[0]+1)*1j]
-        gridy = na.mgrid[LE[1]:RE[1]:(cc[1]+1)*1j]
-        gridz = na.mgrid[LE[2]:RE[2]:(cc[2]+1)*1j]
-
-        x = gridx[cx:cx+2]
-        y = gridy[cy:cy+2]
-        z = gridz[cz:cz+2]
-
-        LE = na.array([x[0], y[0], z[0]], dtype='float64')
-        RE = na.array([x[1], y[1], z[1]], dtype='float64')
-
-        # Default to normal if we don't have a weight, or our subdivisions are
-        # not enough to warrant this procedure.
-        if weight is None or cc[axis] < 1:
-            if padding > 0:
-                return True, \
-                    LE, RE, self.hierarchy.periodic_region_strict(self.center, LE-padding, RE+padding)
-
-            return False, LE, RE, self.hierarchy.region_strict(self.center, LE, RE)
-