[yt-svn] commit/yt: 4 new changesets

Fri Jun 20 11:08:30 PDT 2014

4 new commits in yt:

https://bitbucket.org/yt_analysis/yt/commits/3a9b4e0e9058/
Changeset:   3a9b4e0e9058
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-06-19 15:03:24
Summary:     Testing possible fixes for memory usage.
Affected #:  3 files

diff -r 1bf2d3688801615cef5fd75197987eea5193aeb8 -r 3a9b4e0e9058b07c905a0ddc235de3c1a6a33a63 yt/data_objects/grid_patch.py

--- a/yt/data_objects/grid_patch.py
+++ b/yt/data_objects/grid_patch.py
@@ -38,6 +38,7 @@
     _num_ghost_zones = 0
     _grids = None
     _id_offset = 1
+    _cache_mask = True
 
     _type_name = 'grid'
     _skip_add = True
@@ -351,10 +352,12 @@
         yield self, mask
 
     def _get_selector_mask(self, selector):
-        if hash(selector) == self._last_selector_id:
+        if self._cache_mask and hash(selector) == self._last_selector_id:
             mask = self._last_mask
         else:
-            self._last_mask = mask = selector.fill_mask(self)
+            mask = selector.fill_mask(self)
+            if self._cache_mask:
+                self._last_mask = mask
             self._last_selector_id = hash(selector)
             if mask is None:
                 self._last_count = 0

diff -r 1bf2d3688801615cef5fd75197987eea5193aeb8 -r 3a9b4e0e9058b07c905a0ddc235de3c1a6a33a63 yt/frontends/flash/data_structures.py
--- a/yt/frontends/flash/data_structures.py
+++ b/yt/frontends/flash/data_structures.py
@@ -38,6 +38,7 @@
 from .fields import FLASHFieldInfo
 
 class FLASHGrid(AMRGridPatch):
+    _cache_mask = False
     _id_offset = 1
     #__slots__ = ["_level_id", "stop_index"]
     def __init__(self, id, index, level):

diff -r 1bf2d3688801615cef5fd75197987eea5193aeb8 -r 3a9b4e0e9058b07c905a0ddc235de3c1a6a33a63 yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -290,7 +290,7 @@
         count = sum((g.count(dobj.selector) for g in grids))
         return count
 
-    def _chunk_all(self, dobj, cache = True):
+    def _chunk_all(self, dobj, cache = False):
         gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         yield YTDataChunk(dobj, "all", gobjs, dobj.size, cache)
         
@@ -317,7 +317,7 @@
             # individual grids.
             yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
 
-    def _chunk_io(self, dobj, cache = True, local_only = False):
+    def _chunk_io(self, dobj, cache = False, local_only = False):
         # local_only is only useful for inline datasets and requires
         # implementation by subclasses.
         gfiles = defaultdict(list)


https://bitbucket.org/yt_analysis/yt/commits/4da9da3ca4ce/
Changeset:   4da9da3ca4ce
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-06-19 15:39:58
Summary:     Undo the actual speed problems with the last patch.
Affected #:  2 files

diff -r 3a9b4e0e9058b07c905a0ddc235de3c1a6a33a63 -r 4da9da3ca4cebd3aa6d0a7c785d9d79b9e8f2dd4 yt/frontends/flash/data_structures.py
--- a/yt/frontends/flash/data_structures.py
+++ b/yt/frontends/flash/data_structures.py
@@ -38,7 +38,6 @@
 from .fields import FLASHFieldInfo
 
 class FLASHGrid(AMRGridPatch):
-    _cache_mask = False
     _id_offset = 1
     #__slots__ = ["_level_id", "stop_index"]
     def __init__(self, id, index, level):

diff -r 3a9b4e0e9058b07c905a0ddc235de3c1a6a33a63 -r 4da9da3ca4cebd3aa6d0a7c785d9d79b9e8f2dd4 yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -290,7 +290,7 @@
         count = sum((g.count(dobj.selector) for g in grids))
         return count
 
-    def _chunk_all(self, dobj, cache = False):
+    def _chunk_all(self, dobj, cache = True):
         gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         yield YTDataChunk(dobj, "all", gobjs, dobj.size, cache)
         
@@ -317,7 +317,7 @@
             # individual grids.
             yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
 
-    def _chunk_io(self, dobj, cache = False, local_only = False):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         # local_only is only useful for inline datasets and requires
         # implementation by subclasses.
         gfiles = defaultdict(list)


https://bitbucket.org/yt_analysis/yt/commits/21e6329e45a7/
Changeset:   21e6329e45a7
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-06-19 15:52:36
Summary:     Set a grid chunksize that's relatively small for IO chunking.  This will
sub-chunk.
Affected #:  1 file

diff -r 4da9da3ca4cebd3aa6d0a7c785d9d79b9e8f2dd4 -r 21e6329e45a7f9c98cc5eed0738428b02c1f1869 yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -317,6 +317,7 @@
             # individual grids.
             yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
 
+    _grid_chunksize = 1000
     def _chunk_io(self, dobj, cache = True, local_only = False):
         # local_only is only useful for inline datasets and requires
         # implementation by subclasses.
@@ -325,7 +326,14 @@
         for g in gobjs:
             gfiles[g.filename].append(g)
         for fn in sorted(gfiles):
+            # We can apply a heuristic here to make sure we aren't loading too
+            # many grids all at once.
             gs = gfiles[fn]
-            yield YTDataChunk(dobj, "io", gs, self._count_selection(dobj, gs),
-                              cache = cache)
+            size = self._grid_chunksize
+            
+            for grids in (gs[pos:pos + size] for pos
+                          in xrange(0, len(gs), size)):
+                yield YTDataChunk(dobj, "io", grids,
+                        self._count_selection(dobj, grids),
+                        cache = cache)
 


https://bitbucket.org/yt_analysis/yt/commits/3e627557e0bb/
Changeset:   3e627557e0bb
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-06-20 20:08:22
Summary:     Merged in MatthewTurk/yt/yt-3.0 (pull request #962)

Reduce memory usage for grid datasets
Affected #:  3 files

diff -r 93f97b13eb7ff1509cc45457a17574c19d91f5fb -r 3e627557e0bb1f730575c4990e6fc0f940e7a147 yt/data_objects/grid_patch.py
--- a/yt/data_objects/grid_patch.py
+++ b/yt/data_objects/grid_patch.py
@@ -38,6 +38,7 @@
     _num_ghost_zones = 0
     _grids = None
     _id_offset = 1
+    _cache_mask = True
 
     _type_name = 'grid'
     _skip_add = True
@@ -351,10 +352,12 @@
         yield self, mask
 
     def _get_selector_mask(self, selector):
-        if hash(selector) == self._last_selector_id:
+        if self._cache_mask and hash(selector) == self._last_selector_id:
             mask = self._last_mask
         else:
-            self._last_mask = mask = selector.fill_mask(self)
+            mask = selector.fill_mask(self)
+            if self._cache_mask:
+                self._last_mask = mask
             self._last_selector_id = hash(selector)
             if mask is None:
                 self._last_count = 0

diff -r 93f97b13eb7ff1509cc45457a17574c19d91f5fb -r 3e627557e0bb1f730575c4990e6fc0f940e7a147 yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -317,6 +317,7 @@
             # individual grids.
             yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
 
+    _grid_chunksize = 1000
     def _chunk_io(self, dobj, cache = True, local_only = False):
         # local_only is only useful for inline datasets and requires
         # implementation by subclasses.
@@ -325,7 +326,14 @@
         for g in gobjs:
             gfiles[g.filename].append(g)
         for fn in sorted(gfiles):
+            # We can apply a heuristic here to make sure we aren't loading too
+            # many grids all at once.
             gs = gfiles[fn]
-            yield YTDataChunk(dobj, "io", gs, self._count_selection(dobj, gs),
-                              cache = cache)
+            size = self._grid_chunksize
+            
+            for grids in (gs[pos:pos + size] for pos
+                          in xrange(0, len(gs), size)):
+                yield YTDataChunk(dobj, "io", grids,
+                        self._count_selection(dobj, grids),
+                        cache = cache)

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.