[Yt-svn] yt: Changing the manner in which fields are obtained from grids ...

Mon Apr 26 09:57:54 PDT 2010

hg Repository: yt
details:   yt/rev/c7169b1b4c66
changeset: 1611:c7169b1b4c66
user:      Matthew Turk <matthewturk at gmail.com>
date:
Mon Apr 26 09:57:45 2010 -0700
description:
Changing the manner in which fields are obtained from grids in the projections.
Previously, the IO load was substantially higher than it should have been for
serial projections where the weight field also shows up in the list of fields
to project -- the weight field got read twice.  Now this no longer happens.
For some projections we're now just about compute dominated.  Not quite time to
move to a quadtree, but almost!

diffstat:

 yt/lagos/BaseDataTypes.py |  28 ++++++++++++++++++++--------
 1 files changed, 20 insertions(+), 8 deletions(-)

diffs (53 lines):

diff -r 055ececd0756 -r c7169b1b4c66 yt/lagos/BaseDataTypes.py

--- a/yt/lagos/BaseDataTypes.py	Sun Apr 25 22:17:37 2010 -0700
+++ b/yt/lagos/BaseDataTypes.py	Mon Apr 26 09:57:45 2010 -0700
@@ -1416,16 +1416,27 @@
         pass
 
     def _project_grid(self, grid, fields, zero_out):
+        # We split this next bit into two sections to try to limit the IO load
+        # on the system.  This way, we perserve grid state (@restore_grid_state
+        # in _get_data_from_grid *and* we attempt not to load weight data
+        # independently of the standard field data.
         if self._weight is None:
             weight_data = na.ones(grid.ActiveDimensions, dtype='float64')
+            if zero_out: weight_data[grid.child_indices] = 0
+            masked_data = [fd.astype('float64') * weight_data
+                           for fd in self._get_data_from_grid(grid, fields)]
         else:
-            weight_data = self._get_data_from_grid(grid, self._weight).astype('float64')
-        if zero_out: weight_data[grid.child_indices] = 0
+            fields_to_get = list(set(fields + [self._weight]))
+            field_data = dict(zip(
+                fields_to_get, self._get_data_from_grid(grid, fields_to_get)))
+            weight_data = field_data[self._weight].copy().astype('float64')
+            if zero_out: weight_data[grid.child_indices] = 0
+            masked_data  = [field_data[field].copy().astype('float64') * weight_data
+                                for field in fields]
+            del field_data
         # if we zero it out here, then we only have to zero out the weight!
-        masked_data = [self._get_data_from_grid(grid, field) * weight_data
-                       for field in fields]
-        full_proj = [self.func(field,axis=self.axis) for field in masked_data]
-        weight_proj = self.func(weight_data,axis=self.axis)
+        full_proj = [self.func(field, axis=self.axis) for field in masked_data]
+        weight_proj = self.func(weight_data, axis=self.axis)
         if (self._check_region and not self.source._is_fully_enclosed(grid)) or self._field_cuts is not None:
             used_data = self._get_points_in_region(grid).astype('bool')
             used_points = na.where(na.logical_or.reduce(used_data, self.axis))
@@ -1458,12 +1469,13 @@
         return point_mask
 
     @restore_grid_state
-    def _get_data_from_grid(self, grid, field):
+    def _get_data_from_grid(self, grid, fields):
+        fields = ensure_list(fields)
         if self._check_region:
             bad_points = self._get_points_in_region(grid)
         else:
             bad_points = 1.0
-        return grid[field] * bad_points
+        return [grid[field] * bad_points for field in fields]
 
     def _gen_node_name(self):
         return  "%s/%s" % \