[yt-svn] commit/yt: 16 new changesets

Mon Apr 10 13:41:40 PDT 2017

16 new commits in yt:

https://bitbucket.org/yt_analysis/yt/commits/2de0bef81fe4/
Changeset:   2de0bef81fe4
Branch:      yt
User:        MatthewTurk
Date:        2016-11-16 22:34:08+00:00
Summary:     Experiment with refactoring grid data reading
Affected #:  3 files

diff -r 09f0ef297d7068078a021fc8290d9e3519baf82d -r 2de0bef81fe498a0e4b9c3cfbc1c41b22b3e9d2d yt/frontends/enzo/io.py

--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -137,80 +137,52 @@
                         yield (ptype, field), data[mask]
             if f: f.close()
 
-    def _read_fluid_selection(self, chunks, selector, fields, size):
-        rv = {}
-        # Now we have to do something unpleasant
-        chunks = list(chunks)
-        if selector.__class__.__name__ == "GridSelector":
-            if not (len(chunks) == len(chunks[0].objs) == 1):
-                raise RuntimeError
-            g = chunks[0].objs[0]
-            f = h5py.File(u(g.filename), 'r')
-            if g.id in self._cached_fields:
-                gf = self._cached_fields[g.id]
-                rv.update(gf)
-            if len(rv) == len(fields): return rv
-            gds = f.get("/Grid%08i" % g.id)
-            for field in fields:
-                if field in rv:
-                    self._hits += 1
-                    continue
-                self._misses += 1
-                ftype, fname = field
-                if fname in gds:
-                    rv[(ftype, fname)] = gds.get(fname).value.swapaxes(0, -1)
-                else:
-                    rv[(ftype, fname)] = np.zeros(g.ActiveDimensions)
-            if self._cache_on:
-                for gid in rv:
-                    self._cached_fields.setdefault(gid, {})
-                    self._cached_fields[gid].update(rv[gid])
-            f.close()
-            return rv
-        if size is None:
-            size = sum((g.count(selector) for chunk in chunks
-                        for g in chunk.objs))
-        for field in fields:
-            ftype, fname = field
-            fsize = size
-            rv[field] = np.empty(fsize, dtype="float64")
-        ng = sum(len(c.objs) for c in chunks)
-        mylog.debug("Reading %s cells of %s fields in %s grids",
-                   size, [f2 for f1, f2 in fields], ng)
-        ind = 0
-        h5_type = self._field_dtype
+    def io_iter(self, chunks, fields):
+        h5_dtype = self._field_dtype
         for chunk in chunks:
             fid = None
-            for g in chunk.objs:
-                if g.filename is None: continue
-                if fid is None:
-                    fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
-                gf = self._cached_fields.get(g.id, {})
-                data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
+            filename = -1
+            for obj in chunk.objs:
+                if obj.filename is None: continue
+                if obj.filename != filename:
+                    # Note one really important thing here: even if we do
+                    # implement LRU caching in the _read_chunk_obj function,
+                    # we'll still be doing file opening and whatnot.  This is a
+                    # problem, but one we can return to.
+                    if fid is not None:
+                        fid.close()
+                    fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
+                    filename = obj.filename
+                data = np.empty(obj.ActiveDimensions[::-1], dtype=h5_dtype)
                 data_view = data.swapaxes(0, -1)
-                nd = 0
                 for field in fields:
-                    if field in gf:
-                        nd = g.select(selector, gf[field], rv[field], ind)
-                        self._hits += 1
-                        continue
-                    self._misses += 1
-                    ftype, fname = field
-                    try:
-                        node = "/Grid%08i/%s" % (g.id, fname)
-                        dg = h5py.h5d.open(fid, b(node))
-                    except KeyError:
-                        if fname == "Dark_Matter_Density": continue
-                        raise
-                    dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
-                    if self._cache_on:
-                        self._cached_fields.setdefault(g.id, {})
-                        # Copy because it's a view into an empty temp array
-                        self._cached_fields[g.id][field] = data_view.copy()
-                    nd = g.select(selector, data_view, rv[field], ind) # caches
-                ind += nd
-            if fid: fid.close()
-        return rv
+                    yield chunk, obj, field, (fid, data_view, data)
+        if fid is not None:
+            fid.close()
+        
+    def _read_chunk_obj(self, chunk, obj, field,
+            (fid, data_view, data) = (None, None, None)):
+        if fid is None:
+            close = True
+            fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
+        else:
+            close = False
+        if data_view is None or data is None:
+            data = np.empty(obj.ActiveDimensions[::-1],
+                            dtype=self._field_dtype)
+            data_view = data.swapaxes(0, -1)
+        ftype, fname = field
+        try:
+            node = "/Grid%08i/%s" % (obj.id, fname)
+            dg = h5py.h5d.open(fid, b(node))
+        except KeyError:
+            if fname == "Dark_Matter_Density": return None
+            raise
+        dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
+        dg.close()
+        if close:
+            fid.close()
+        return data_view
 
     @contextmanager
     def preload(self, chunk, fields, max_size):

diff -r 09f0ef297d7068078a021fc8290d9e3519baf82d -r 2de0bef81fe498a0e4b9c3cfbc1c41b22b3e9d2d yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -61,6 +61,19 @@
             count_list, conv_factors):
         pass
 
+    def io_iter(self, chunks, fields):
+        f = self._handle
+        for field in fields:
+            ftype, fname = field
+            ds = f["/%s" % fname]
+            for chunk in chunks:
+                for gs in grid_sequences(chunk.objs):
+                    start = gs[0].id - gs[0]._id_offset
+                    end = gs[-1].id - gs[-1]._id_offset + 1
+                    data = ds[start:end,:,:,:]
+                    for i, g in enumerate(gs):
+                        yield chunk, g, field, (data, i)
+
     def _read_particle_coords(self, chunks, ptf):
         chunks = list(chunks)
         f_part = self._particle_handle
@@ -104,31 +117,17 @@
                     data = p_fields[start:end, fi]
                     yield (ptype, field), data[mask]
 
-    def _read_fluid_selection(self, chunks, selector, fields, size):
-        chunks = list(chunks)
-        if any((ftype != "flash" for ftype, fname in fields)):
-            raise NotImplementedError
-        f = self._handle
-        rv = {}
-        for field in fields:
-            ftype, fname = field
-            # Always use *native* 64-bit float.
-            rv[field] = np.empty(size, dtype="=f8")
-        ng = sum(len(c.objs) for c in chunks)
-        mylog.debug("Reading %s cells of %s fields in %s blocks",
-                    size, [f2 for f1, f2 in fields], ng)
-        for field in fields:
-            ftype, fname = field
-            ds = f["/%s" % fname]
-            ind = 0
-            for chunk in chunks:
-                for gs in grid_sequences(chunk.objs):
-                    start = gs[0].id - gs[0]._id_offset
-                    end = gs[-1].id - gs[-1]._id_offset + 1
-                    data = ds[start:end,:,:,:].transpose()
-                    for i, g in enumerate(gs):
-                        ind += g.select(selector, data[...,i], rv[field], ind)
-        return rv
+    def _read_chunk_obj(self, chunk, obj, field, (ds, offset) = (None, -1)):
+        # our context here includes datasets and whatnot that are opened in the
+        # hdf5 file
+        ds, offset = ctx
+        if ds is None:
+            ds = self._handle["/%s" % field[1]]
+        if offset == -1:
+            data = ds[obj.id - obj._id_offset, :,:,:].transpose()
+        else:
+            data = ds[offset, :,:,:].transpose()
+        return data
 
     def _read_chunk_data(self, chunk, fields):
         f = self._handle

diff -r 09f0ef297d7068078a021fc8290d9e3519baf82d -r 2de0bef81fe498a0e4b9c3cfbc1c41b22b3e9d2d yt/utilities/io_handler.py
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -108,6 +108,20 @@
     def _read_data(self, grid, field):
         pass
 
+    def _read_fluid_selection(self, chunks, selector, fields, size):
+        # This function has an interesting history.  It previously was mandate
+        # to be defined by all of the subclasses.  But, to avoid having to
+        # rewrite a whole bunch of IO handlers all at once, and to allow a
+        # better abstraction for grid-based frontends, we're now defining it in
+        # the base class.
+        rv = {field: np.empty(size, dtype="=f8") for field in fields} 
+        ind = {field: 0 for field in fields}
+        for chunk, obj, field, ctx in self.io_iter(chunks, fields):
+            d = self._read_chunk_obj(chunk, obj, field, ctx)
+            if d is None: continue
+            ind[field] += obj.select(selector, d, rv[field], ind[field])
+        return rv
+
     def _read_data_slice(self, grid, field, axis, coord):
         sl = [slice(None), slice(None), slice(None)]
         sl[axis] = slice(coord, coord + 1)


https://bitbucket.org/yt_analysis/yt/commits/73f64b85e8d9/
Changeset:   73f64b85e8d9
Branch:      yt
User:        MatthewTurk
Date:        2016-11-16 23:24:05+00:00
Summary:     Refactoring a bit
Affected #:  3 files

diff -r 2de0bef81fe498a0e4b9c3cfbc1c41b22b3e9d2d -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -146,7 +146,7 @@
                 if obj.filename is None: continue
                 if obj.filename != filename:
                     # Note one really important thing here: even if we do
-                    # implement LRU caching in the _read_chunk_obj function,
+                    # implement LRU caching in the _read_obj_field function,
                     # we'll still be doing file opening and whatnot.  This is a
                     # problem, but one we can return to.
                     if fid is not None:
@@ -154,23 +154,20 @@
                     fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
                     filename = obj.filename
                 data = np.empty(obj.ActiveDimensions[::-1], dtype=h5_dtype)
-                data_view = data.swapaxes(0, -1)
                 for field in fields:
-                    yield chunk, obj, field, (fid, data_view, data)
+                    yield chunk, obj, field, (fid, data)
         if fid is not None:
             fid.close()
         
-    def _read_chunk_obj(self, chunk, obj, field,
-            (fid, data_view, data) = (None, None, None)):
+    def _read_obj_field(self, obj, field, (fid, data) = (None, None, None)):
         if fid is None:
             close = True
             fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
         else:
             close = False
-        if data_view is None or data is None:
+        if data is None:
             data = np.empty(obj.ActiveDimensions[::-1],
                             dtype=self._field_dtype)
-            data_view = data.swapaxes(0, -1)
         ftype, fname = field
         try:
             node = "/Grid%08i/%s" % (obj.id, fname)
@@ -182,7 +179,7 @@
         dg.close()
         if close:
             fid.close()
-        return data_view
+        return data.T
 
     @contextmanager
     def preload(self, chunk, fields, max_size):

diff -r 2de0bef81fe498a0e4b9c3cfbc1c41b22b3e9d2d -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -117,10 +117,9 @@
                     data = p_fields[start:end, fi]
                     yield (ptype, field), data[mask]
 
-    def _read_chunk_obj(self, chunk, obj, field, (ds, offset) = (None, -1)):
+    def _read_obj_field(self, obj, field, (ds, offset) = (None, -1)):
         # our context here includes datasets and whatnot that are opened in the
         # hdf5 file
-        ds, offset = ctx
         if ds is None:
             ds = self._handle["/%s" % field[1]]
         if offset == -1:

diff -r 2de0bef81fe498a0e4b9c3cfbc1c41b22b3e9d2d -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b yt/utilities/io_handler.py
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -117,7 +117,7 @@
         rv = {field: np.empty(size, dtype="=f8") for field in fields} 
         ind = {field: 0 for field in fields}
         for chunk, obj, field, ctx in self.io_iter(chunks, fields):
-            d = self._read_chunk_obj(chunk, obj, field, ctx)
+            d = self._read_obj_field(obj, field, ctx)
             if d is None: continue
             ind[field] += obj.select(selector, d, rv[field], ind[field])
         return rv


https://bitbucket.org/yt_analysis/yt/commits/f2c9dda7f689/
Changeset:   f2c9dda7f689
Branch:      yt
User:        MatthewTurk
Date:        2016-11-17 17:27:16+00:00
Summary:     Refactor and add LRU cache
Affected #:  4 files

diff -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b -r f2c9dda7f689bfb7921133c24c5049c187a34a64 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -155,7 +155,7 @@
                     filename = obj.filename
                 data = np.empty(obj.ActiveDimensions[::-1], dtype=h5_dtype)
                 for field in fields:
-                    yield chunk, obj, field, (fid, data)
+                    yield field, obj, self._read_obj_field(obj, field, (fid, data))
         if fid is not None:
             fid.close()
         

diff -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b -r f2c9dda7f689bfb7921133c24c5049c187a34a64 yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -72,7 +72,7 @@
                     end = gs[-1].id - gs[-1]._id_offset + 1
                     data = ds[start:end,:,:,:]
                     for i, g in enumerate(gs):
-                        yield chunk, g, field, (data, i)
+                        yield field, g, self._read_obj_field(g, field, (data, i))
 
     def _read_particle_coords(self, chunks, ptf):
         chunks = list(chunks)

diff -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b -r f2c9dda7f689bfb7921133c24c5049c187a34a64 yt/utilities/io_handler.py
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -20,16 +20,28 @@
 from yt.utilities.on_demand_imports import _h5py as h5py
 import numpy as np
 from yt.extern.six import add_metaclass
+from yt.utilities.lru_cache import \
+    local_lru_cache, _HashedSeq, _make_key
 
 _axis_ids = {0:2,1:1,2:0}
 
 io_registry = {}
 
+use_caching = 0
+
+def _make_io_key( args, *_args, **kwargs):
+    self, obj, field, ctx = args
+    # Ignore self because we have a self-specific cache
+    return _make_key((obj.id, field), *_args, **kwargs)
+
 class RegisteredIOHandler(type):
     def __init__(cls, name, b, d):
         type.__init__(cls, name, b, d)
         if hasattr(cls, "_dataset_type"):
             io_registry[cls._dataset_type] = cls
+        if use_caching and hasattr(cls, "_read_obj_field"):
+            cls._read_obj_field = local_lru_cache(maxsize=use_caching, 
+                    typed=True, make_key=_make_io_key)(cls._read_obj_field)
 
 @add_metaclass(RegisteredIOHandler)
 class BaseIOHandler(object):
@@ -54,7 +66,6 @@
 
     # We need a function for reading a list of sets
     # and a function for *popping* from a queue all the appropriate sets
-
     @contextmanager
     def preload(self, chunk, fields, max_size):
         yield self
@@ -87,7 +98,7 @@
             return return_val
         else:
             return False
-            
+
     def _read_data_set(self, grid, field):
         # check backup file first. if field not found,
         # call frontend-specific io method
@@ -116,10 +127,9 @@
         # the base class.
         rv = {field: np.empty(size, dtype="=f8") for field in fields} 
         ind = {field: 0 for field in fields}
-        for chunk, obj, field, ctx in self.io_iter(chunks, fields):
-            d = self._read_obj_field(obj, field, ctx)
-            if d is None: continue
-            ind[field] += obj.select(selector, d, rv[field], ind[field])
+        for field, obj, data in self.io_iter(chunks, fields):
+            if data is None: continue
+            ind[field] += obj.select(selector, data, rv[field], ind[field])
         return rv
 
     def _read_data_slice(self, grid, field, axis, coord):

diff -r 73f64b85e8d9f9047e5e145af8f20f15a8d6572b -r f2c9dda7f689bfb7921133c24c5049c187a34a64 yt/utilities/lru_cache.py
--- a/yt/utilities/lru_cache.py
+++ b/yt/utilities/lru_cache.py
@@ -53,7 +53,7 @@
         return key[0]
     return _HashedSeq(key)
 
-def lru_cache(maxsize=100, typed=False):
+def lru_cache(maxsize=100, typed=False, make_key = _make_key):
     """Least-recently-used cache decorator.
     If *maxsize* is set to None, the LRU features are disabled and the cache
     can grow without bound.
@@ -77,7 +77,6 @@
         cache = dict()
         stats = [0, 0]                  # make statistics updateable non-locally
         HITS, MISSES = 0, 1             # names for the stats fields
-        make_key = _make_key
         cache_get = cache.get           # bound method to lookup key or return None
         _len = len                      # localize the global len() function
         lock = RLock()                  # because linkedlist updates aren't threadsafe
@@ -182,6 +181,8 @@
     return decorating_function
 ### End of backported lru_cache
 
+local_lru_cache = lru_cache
+
 if sys.version_info[:2] >= (3, 3):
     # 3.2 has an lru_cache with an incompatible API
     from functools import lru_cache


https://bitbucket.org/yt_analysis/yt/commits/cbe2a906c168/
Changeset:   cbe2a906c168
Branch:      yt
User:        MatthewTurk
Date:        2016-11-17 19:19:20+00:00
Summary:     Remove call to dg.close()
Affected #:  1 file

diff -r f2c9dda7f689bfb7921133c24c5049c187a34a64 -r cbe2a906c1689acd83dd726408b7e5d5a438e1b5 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -176,7 +176,9 @@
             if fname == "Dark_Matter_Density": return None
             raise
         dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
-        dg.close()
+        # I don't know why, but on some installations of h5py this works, but
+        # on others, nope.  Doesn't seem to be a version thing.
+        #dg.close()
         if close:
             fid.close()
         return data.T


https://bitbucket.org/yt_analysis/yt/commits/a8939b5d5360/
Changeset:   a8939b5d5360
Branch:      yt
User:        MatthewTurk
Date:        2016-11-17 19:20:32+00:00
Summary:     Fixing flake8
Affected #:  3 files

diff -r cbe2a906c1689acd83dd726408b7e5d5a438e1b5 -r a8939b5d53605139836acee82c6ba973f8fdc2f2 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -20,7 +20,7 @@
     BaseIOHandler
 from yt.utilities.logger import ytLogger as mylog
 from yt.geometry.selection_routines import AlwaysSelector
-from yt.extern.six import u, b, iteritems
+from yt.extern.six import b, iteritems
 from yt.utilities.on_demand_imports import _h5py as h5py
 
 import numpy as np

diff -r cbe2a906c1689acd83dd726408b7e5d5a438e1b5 -r a8939b5d53605139836acee82c6ba973f8fdc2f2 yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -18,7 +18,6 @@
 
 from yt.utilities.io_handler import \
     BaseIOHandler
-from yt.utilities.logger import ytLogger as mylog
 from yt.geometry.selection_routines import AlwaysSelector
 from yt.utilities.lib.geometry_utils import \
     compute_morton

diff -r cbe2a906c1689acd83dd726408b7e5d5a438e1b5 -r a8939b5d53605139836acee82c6ba973f8fdc2f2 yt/utilities/io_handler.py
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -21,7 +21,7 @@
 import numpy as np
 from yt.extern.six import add_metaclass
 from yt.utilities.lru_cache import \
-    local_lru_cache, _HashedSeq, _make_key
+    local_lru_cache, _make_key
 
 _axis_ids = {0:2,1:1,2:0}
 


https://bitbucket.org/yt_analysis/yt/commits/72e5ba72c421/
Changeset:   72e5ba72c421
Branch:      yt
User:        MatthewTurk
Date:        2016-12-28 17:39:04+00:00
Summary:     Switching order of loops for chunk and fields in FLASH.

This is an interesting thing.  Our chunk loop has to be the outermost one
possible, as it can get exhausted, whereas our field loop won't be.  Since
we're trying to do all iteration-based work here, we need to ensure we don't
run out of chunks before fields.
Affected #:  1 file

diff -r a8939b5d53605139836acee82c6ba973f8fdc2f2 -r 72e5ba72c421adc3be1e4548adb96dbf2529b4ff yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -62,10 +62,13 @@
 
     def io_iter(self, chunks, fields):
         f = self._handle
-        for field in fields:
-            ftype, fname = field
-            ds = f["/%s" % fname]
-            for chunk in chunks:
+        for chunk in chunks:
+            for field in fields:
+                # Note that we *prefer* to iterate over the fields on the
+                # outside; here, though, we're iterating over them on the
+                # inside because we may exhaust our chunks.
+                ftype, fname = field
+                ds = f["/%s" % fname]
                 for gs in grid_sequences(chunk.objs):
                     start = gs[0].id - gs[0]._id_offset
                     end = gs[-1].id - gs[-1]._id_offset + 1


https://bitbucket.org/yt_analysis/yt/commits/782b08b33cf5/
Changeset:   782b08b33cf5
Branch:      yt
User:        MatthewTurk
Date:        2016-12-29 16:38:08+00:00
Summary:     Fixing keyword argument and default value length.
Affected #:  1 file

diff -r 72e5ba72c421adc3be1e4548adb96dbf2529b4ff -r 782b08b33cf500d656890a4454e2dfd7f8650a5e yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -159,7 +159,7 @@
         if fid is not None:
             fid.close()
         
-    def _read_obj_field(self, obj, field, (fid, data) = (None, None, None)):
+    def _read_obj_field(self, obj, field, (fid, data) = (None, None)):
         if fid is None:
             close = True
             fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)


https://bitbucket.org/yt_analysis/yt/commits/258f097c5139/
Changeset:   258f097c5139
Branch:      yt
User:        MatthewTurk
Date:        2017-01-23 21:49:06+00:00
Summary:     Fixing Python3 syntax errors
Affected #:  2 files

diff -r 782b08b33cf500d656890a4454e2dfd7f8650a5e -r 258f097c5139cf94d8a473b9f9bec54150ace7f2 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -159,7 +159,9 @@
         if fid is not None:
             fid.close()
         
-    def _read_obj_field(self, obj, field, (fid, data) = (None, None)):
+    def _read_obj_field(self, obj, field, fid_data = None):
+        if fid_data is None: fid_data = (None, None)
+        fid, data = fid_data
         if fid is None:
             close = True
             fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)

diff -r 782b08b33cf500d656890a4454e2dfd7f8650a5e -r 258f097c5139cf94d8a473b9f9bec54150ace7f2 yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -119,7 +119,9 @@
                     data = p_fields[start:end, fi]
                     yield (ptype, field), data[mask]
 
-    def _read_obj_field(self, obj, field, (ds, offset) = (None, -1)):
+    def _read_obj_field(self, obj, field, ds_offset = None):
+        if ds_offset is None: ds_offset = (None, -1)
+        ds, offset = ds_offset
         # our context here includes datasets and whatnot that are opened in the
         # hdf5 file
         if ds is None:


https://bitbucket.org/yt_analysis/yt/commits/71517fb0e59e/
Changeset:   71517fb0e59e
Branch:      yt
User:        MatthewTurk
Date:        2017-01-23 21:49:25+00:00
Summary:     Unifying particle IO in enzo
Affected #:  1 file

diff -r 258f097c5139cf94d8a473b9f9bec54150ace7f2 -r 71517fb0e59e4992ebf78689579896e428123ca3 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -77,33 +77,10 @@
         return (KeyError,)
 
     def _read_particle_coords(self, chunks, ptf):
-        chunks = list(chunks)
-        for chunk in chunks: # These should be organized by grid filename
-            f = None
-            for g in chunk.objs:
-                if g.filename is None: continue
-                if f is None:
-                    #print "Opening (count) %s" % g.filename
-                    f = h5py.File(g.filename, "r")
-                nap = sum(g.NumberOfActiveParticles.values())
-                if g.NumberOfParticles == 0 and nap == 0:
-                    continue
-                ds = f.get("/Grid%08i" % g.id)
-                for ptype, field_list in sorted(ptf.items()):
-                    if ptype != "io":
-                        if g.NumberOfActiveParticles[ptype] == 0: continue
-                        pds = ds.get("Particles/%s" % ptype)
-                    else:
-                        pds = ds
-                    pn = _particle_position_names.get(ptype,
-                            r"particle_position_%s")
-                    x, y, z = (np.asarray(pds.get(pn % ax).value, dtype="=f8")
-                               for ax in 'xyz')
-                    for field in field_list:
-                        if np.asarray(pds[field]).ndim > 1:
-                            self._array_fields[field] = pds[field].shape
-                    yield ptype, (x, y, z)
-            if f: f.close()
+        # This is fun!  We can get rid of lots of duplicated code just by
+        # adding a conditional into _read_particle_fields and calling that.
+        for rv in self._read_particle_fields(chunks, ptf, None):
+            yield rv
 
     def _read_particle_fields(self, chunks, ptf, selector):
         chunks = list(chunks)
@@ -128,6 +105,11 @@
                             r"particle_position_%s")
                     x, y, z = (np.asarray(pds.get(pn % ax).value, dtype="=f8")
                                for ax in 'xyz')
+                    if selector is None:
+                        # This only ever happens if the call is made from
+                        # _read_particle_coords.
+                        yield ptype, (x, y, z)
+                        continue
                     mask = selector.select_points(x, y, z, 0.0)
                     if mask is None: continue
                     for field in field_list:


https://bitbucket.org/yt_analysis/yt/commits/401c41414a67/
Changeset:   401c41414a67
Branch:      yt
User:        MatthewTurk
Date:        2017-01-23 21:57:19+00:00
Summary:     Removing preload for enzo
Affected #:  1 file

diff -r 71517fb0e59e4992ebf78689579896e428123ca3 -r 401c41414a67f817a7dae4091d4d45ce3ebda7a4 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -167,94 +167,6 @@
             fid.close()
         return data.T
 
-    @contextmanager
-    def preload(self, chunk, fields, max_size):
-        if len(fields) == 0:
-            yield self
-            return
-        old_cache_on = self._cache_on
-        old_cached_fields = self._cached_fields
-        self._cached_fields = cf = {}
-        self._cache_on = True
-        for gid in old_cached_fields:
-            # Will not copy numpy arrays, which is good!
-            cf[gid] = old_cached_fields[gid].copy() 
-        self._hits = self._misses = 0
-        self._cached_fields = self._read_chunk_data(chunk, fields)
-        mylog.debug("(1st) Hits = % 10i Misses = % 10i",
-            self._hits, self._misses)
-        self._hits = self._misses = 0
-        yield self
-        mylog.debug("(2nd) Hits = % 10i Misses = % 10i",
-            self._hits, self._misses)
-        self._cached_fields = old_cached_fields
-        self._cache_on = old_cache_on
-        # Randomly remove some grids from the cache.  Note that we're doing
-        # this on a grid basis, not a field basis.  Performance will be
-        # slightly non-deterministic as a result of this, but it should roughly
-        # be statistically alright, assuming (as we do) that this will get
-        # called during largely unbalanced stuff.
-        if len(self._cached_fields) > max_size:
-            to_remove = random.sample(self._cached_fields.keys(),
-                len(self._cached_fields) - max_size)
-            mylog.debug("Purging from cache %s", len(to_remove))
-            for k in to_remove:
-                self._cached_fields.pop(k)
-        else:
-            mylog.warning("Cache size % 10i (max % 10i)",
-                len(self._cached_fields), max_size)
-
-    def _read_chunk_data(self, chunk, fields):
-        fid = fn = None
-        rv = {}
-        mylog.debug("Preloading fields %s", fields)
-        # Split into particles and non-particles
-        fluid_fields, particle_fields = [], []
-        for ftype, fname in fields:
-            if ftype in self.ds.particle_types:
-                particle_fields.append((ftype, fname))
-            else:
-                fluid_fields.append((ftype, fname))
-        if len(particle_fields) > 0:
-            selector = AlwaysSelector(self.ds)
-            rv.update(self._read_particle_selection(
-              [chunk], selector, particle_fields))
-        if len(fluid_fields) == 0: return rv
-        h5_type = self._field_dtype
-        for g in chunk.objs:
-            rv[g.id] = gf = {}
-            if g.id in self._cached_fields:
-                rv[g.id].update(self._cached_fields[g.id])
-            if g.filename is None: continue
-            elif g.filename != fn:
-                if fid is not None: fid.close()
-                fid = None
-            if fid is None:
-                fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
-                fn = g.filename
-            data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
-            data_view = data.swapaxes(0, -1)
-            for field in fluid_fields:
-                if field in gf:
-                    self._hits += 1
-                    continue
-                self._misses += 1
-                ftype, fname = field
-                try:
-                    node = "/Grid%08i/%s" % (g.id, fname)
-                    dg = h5py.h5d.open(fid, b(node))
-                except KeyError:
-                    if fname == "Dark_Matter_Density": continue
-                    raise
-                dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
-                gf[field] = data_view.copy()
-        if fid: fid.close()
-        if self._cache_on:
-            for gid in rv:
-                self._cached_fields.setdefault(gid, {})
-                self._cached_fields[gid].update(rv[gid])
-        return rv
-
 class IOHandlerPackedHDF5GhostZones(IOHandlerPackedHDF5):
     _dataset_type = "enzo_packed_3d_gz"
 


https://bitbucket.org/yt_analysis/yt/commits/27683e354692/
Changeset:   27683e354692
Branch:      yt
User:        MatthewTurk
Date:        2017-01-23 21:58:10+00:00
Summary:     Simplifying the enzo ghost zone handling
Affected #:  1 file

diff -r 401c41414a67f817a7dae4091d4d45ce3ebda7a4 -r 27683e3546923c460f9d2ebcbe796c63736ec92b yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -177,11 +177,9 @@
                       slice(NGZ, -NGZ),
                       slice(NGZ, -NGZ))
 
-    def _read_raw_data_set(self, grid, field):
-        f = h5py.File(grid.filename, "r")
-        ds = f["/Grid%08i/%s" % (grid.id, field)][:].swapaxes(0,2)
-        f.close()
-        return ds
+    def _read_obj_field(self, *args, **kwargs):
+        return super(IOHandlerPackedHDF5GhostZones, self)._read_obj_field(
+                *args, **kwargs)[self._base]
 
 class IOHandlerInMemory(BaseIOHandler):
 


https://bitbucket.org/yt_analysis/yt/commits/094a1b76319e/
Changeset:   094a1b76319e
Branch:      yt
User:        MatthewTurk
Date:        2017-02-01 21:01:49+00:00
Summary:     Fixing flake8
Affected #:  1 file

diff -r 27683e3546923c460f9d2ebcbe796c63736ec92b -r 094a1b76319e4565232bae4eb904f467cc5de7d7 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -13,13 +13,9 @@
 # The full license is in the file COPYING.txt, distributed with this software.
 #-----------------------------------------------------------------------------
 
-import random
-from contextlib import contextmanager
-
 from yt.utilities.io_handler import \
     BaseIOHandler
 from yt.utilities.logger import ytLogger as mylog
-from yt.geometry.selection_routines import AlwaysSelector
 from yt.extern.six import b, iteritems
 from yt.utilities.on_demand_imports import _h5py as h5py
 


https://bitbucket.org/yt_analysis/yt/commits/bf2ed2cd50c1/
Changeset:   bf2ed2cd50c1
Branch:      yt
User:        MatthewTurk
Date:        2017-02-01 22:01:31+00:00
Summary:     Adding in a check for GridSelector to bypass selection call.
Affected #:  1 file

diff -r 094a1b76319e4565232bae4eb904f467cc5de7d7 -r bf2ed2cd50c1a7aee53a73f53e39e7e5fe82dbd4 yt/utilities/io_handler.py
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -129,6 +129,10 @@
         ind = {field: 0 for field in fields}
         for field, obj, data in self.io_iter(chunks, fields):
             if data is None: continue
+            if selector.__class__.__name__ == "GridSelector":
+                ind[field] += data.size
+                rv[field] = data.copy()
+                continue
             ind[field] += obj.select(selector, data, rv[field], ind[field])
         return rv
 


https://bitbucket.org/yt_analysis/yt/commits/bad4fbf78161/
Changeset:   bad4fbf78161
Branch:      yt
User:        MatthewTurk
Date:        2017-02-02 16:15:23+00:00
Summary:     Bumping version number for flash tests.

I was able to identify why this is necessary.  This set of changes allows
32-bit FLASH files to return 32-bit values.  Previously, these were upcast to
64-bit.  This new behavior is correct, so we bump.
Affected #:  1 file

diff -r bf2ed2cd50c1a7aee53a73f53e39e7e5fe82dbd4 -r bad4fbf7816122e5e2c2d868bebf26bb5536769f tests/tests.yaml
--- a/tests/tests.yaml
+++ b/tests/tests.yaml
@@ -14,7 +14,7 @@
   local_fits_001:
     - yt/frontends/fits/tests/test_outputs.py
 
-  local_flash_004:
+  local_flash_005:
     - yt/frontends/flash/tests/test_outputs.py
 
   local_gadget_001:


https://bitbucket.org/yt_analysis/yt/commits/ab1d31997e27/
Changeset:   ab1d31997e27
Branch:      yt
User:        MatthewTurk
Date:        2017-02-16 20:52:20+00:00
Summary:     Removing obsolete comments
Affected #:  1 file

diff -r bad4fbf7816122e5e2c2d868bebf26bb5536769f -r ab1d31997e275be6e02e0568ce6c4c8922772bda yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -73,8 +73,6 @@
         return (KeyError,)
 
     def _read_particle_coords(self, chunks, ptf):
-        # This is fun!  We can get rid of lots of duplicated code just by
-        # adding a conditional into _read_particle_fields and calling that.
         for rv in self._read_particle_fields(chunks, ptf, None):
             yield rv
 


https://bitbucket.org/yt_analysis/yt/commits/98c877e47f5c/
Changeset:   98c877e47f5c
Branch:      yt
User:        ngoldbaum
Date:        2017-04-10 20:41:35+00:00
Summary:     Merged in MatthewTurk/yt (pull request #2438)

Refactor Grid IO

Approved-by: John ZuHone <jzuhone at gmail.com>
Approved-by: Nathan Goldbaum <ngoldbau at illinois.edu>
Approved-by: yt-fido <yt.fido at gmail.com>
Affected #:  5 files

diff -r 0b3d4a49ffbbfa267c567f2d1e0ecec12dfccf90 -r 98c877e47f5ce8bbec42b24b5a5c1f1ed8782db3 tests/tests.yaml
--- a/tests/tests.yaml
+++ b/tests/tests.yaml
@@ -17,7 +17,7 @@
   local_fits_001:
     - yt/frontends/fits/tests/test_outputs.py
 
-  local_flash_004:
+  local_flash_005:
     - yt/frontends/flash/tests/test_outputs.py
 
   local_gadget_001:

diff -r 0b3d4a49ffbbfa267c567f2d1e0ecec12dfccf90 -r 98c877e47f5ce8bbec42b24b5a5c1f1ed8782db3 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -13,14 +13,10 @@
 # The full license is in the file COPYING.txt, distributed with this software.
 #-----------------------------------------------------------------------------
 
-import random
-from contextlib import contextmanager
-
 from yt.utilities.io_handler import \
     BaseIOHandler
 from yt.utilities.logger import ytLogger as mylog
-from yt.geometry.selection_routines import AlwaysSelector
-from yt.extern.six import u, b, iteritems
+from yt.extern.six import b, iteritems
 from yt.utilities.on_demand_imports import _h5py as h5py
 
 import numpy as np
@@ -77,33 +73,8 @@
         return (KeyError,)
 
     def _read_particle_coords(self, chunks, ptf):
-        chunks = list(chunks)
-        for chunk in chunks: # These should be organized by grid filename
-            f = None
-            for g in chunk.objs:
-                if g.filename is None: continue
-                if f is None:
-                    #print "Opening (count) %s" % g.filename
-                    f = h5py.File(g.filename, "r")
-                nap = sum(g.NumberOfActiveParticles.values())
-                if g.NumberOfParticles == 0 and nap == 0:
-                    continue
-                ds = f.get("/Grid%08i" % g.id)
-                for ptype, field_list in sorted(ptf.items()):
-                    if ptype != "io":
-                        if g.NumberOfActiveParticles[ptype] == 0: continue
-                        pds = ds.get("Particles/%s" % ptype)
-                    else:
-                        pds = ds
-                    pn = _particle_position_names.get(ptype,
-                            r"particle_position_%s")
-                    x, y, z = (np.asarray(pds.get(pn % ax).value, dtype="=f8")
-                               for ax in 'xyz')
-                    for field in field_list:
-                        if np.asarray(pds[field]).ndim > 1:
-                            self._array_fields[field] = pds[field].shape
-                    yield ptype, (x, y, z)
-            if f: f.close()
+        for rv in self._read_particle_fields(chunks, ptf, None):
+            yield rv
 
     def _read_particle_fields(self, chunks, ptf, selector):
         chunks = list(chunks)
@@ -128,6 +99,11 @@
                             r"particle_position_%s")
                     x, y, z = (np.asarray(pds.get(pn % ax).value, dtype="=f8")
                                for ax in 'xyz')
+                    if selector is None:
+                        # This only ever happens if the call is made from
+                        # _read_particle_coords.
+                        yield ptype, (x, y, z)
+                        continue
                     mask = selector.select_points(x, y, z, 0.0)
                     if mask is None: continue
                     for field in field_list:
@@ -137,168 +113,53 @@
                         yield (ptype, field), data[mask]
             if f: f.close()
 
-    def _read_fluid_selection(self, chunks, selector, fields, size):
-        rv = {}
-        # Now we have to do something unpleasant
-        chunks = list(chunks)
-        if selector.__class__.__name__ == "GridSelector":
-            if not (len(chunks) == len(chunks[0].objs) == 1):
-                raise RuntimeError
-            g = chunks[0].objs[0]
-            f = h5py.File(u(g.filename), 'r')
-            if g.id in self._cached_fields:
-                gf = self._cached_fields[g.id]
-                rv.update(gf)
-            if len(rv) == len(fields): return rv
-            gds = f.get("/Grid%08i" % g.id)
-            for field in fields:
-                if field in rv:
-                    self._hits += 1
-                    continue
-                self._misses += 1
-                ftype, fname = field
-                if fname in gds:
-                    rv[(ftype, fname)] = gds.get(fname).value.swapaxes(0, -1)
-                else:
-                    rv[(ftype, fname)] = np.zeros(g.ActiveDimensions)
-            if self._cache_on:
-                for gid in rv:
-                    self._cached_fields.setdefault(gid, {})
-                    self._cached_fields[gid].update(rv[gid])
-            f.close()
-            return rv
-        if size is None:
-            size = sum((g.count(selector) for chunk in chunks
-                        for g in chunk.objs))
-        for field in fields:
-            ftype, fname = field
-            fsize = size
-            rv[field] = np.empty(fsize, dtype="float64")
-        ng = sum(len(c.objs) for c in chunks)
-        mylog.debug("Reading %s cells of %s fields in %s grids",
-                   size, [f2 for f1, f2 in fields], ng)
-        ind = 0
-        h5_type = self._field_dtype
+    def io_iter(self, chunks, fields):
+        h5_dtype = self._field_dtype
         for chunk in chunks:
             fid = None
-            for g in chunk.objs:
-                if g.filename is None: continue
-                if fid is None:
-                    fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
-                gf = self._cached_fields.get(g.id, {})
-                data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
-                data_view = data.swapaxes(0, -1)
-                nd = 0
+            filename = -1
+            for obj in chunk.objs:
+                if obj.filename is None: continue
+                if obj.filename != filename:
+                    # Note one really important thing here: even if we do
+                    # implement LRU caching in the _read_obj_field function,
+                    # we'll still be doing file opening and whatnot.  This is a
+                    # problem, but one we can return to.
+                    if fid is not None:
+                        fid.close()
+                    fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
+                    filename = obj.filename
+                data = np.empty(obj.ActiveDimensions[::-1], dtype=h5_dtype)
                 for field in fields:
-                    if field in gf:
-                        nd = g.select(selector, gf[field], rv[field], ind)
-                        self._hits += 1
-                        continue
-                    self._misses += 1
-                    ftype, fname = field
-                    try:
-                        node = "/Grid%08i/%s" % (g.id, fname)
-                        dg = h5py.h5d.open(fid, b(node))
-                    except KeyError:
-                        if fname == "Dark_Matter_Density": continue
-                        raise
-                    dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
-                    if self._cache_on:
-                        self._cached_fields.setdefault(g.id, {})
-                        # Copy because it's a view into an empty temp array
-                        self._cached_fields[g.id][field] = data_view.copy()
-                    nd = g.select(selector, data_view, rv[field], ind) # caches
-                ind += nd
-            if fid: fid.close()
-        return rv
-
-    @contextmanager
-    def preload(self, chunk, fields, max_size):
-        if len(fields) == 0:
-            yield self
-            return
-        old_cache_on = self._cache_on
-        old_cached_fields = self._cached_fields
-        self._cached_fields = cf = {}
-        self._cache_on = True
-        for gid in old_cached_fields:
-            # Will not copy numpy arrays, which is good!
-            cf[gid] = old_cached_fields[gid].copy() 
-        self._hits = self._misses = 0
-        self._cached_fields = self._read_chunk_data(chunk, fields)
-        mylog.debug("(1st) Hits = % 10i Misses = % 10i",
-            self._hits, self._misses)
-        self._hits = self._misses = 0
-        yield self
-        mylog.debug("(2nd) Hits = % 10i Misses = % 10i",
-            self._hits, self._misses)
-        self._cached_fields = old_cached_fields
-        self._cache_on = old_cache_on
-        # Randomly remove some grids from the cache.  Note that we're doing
-        # this on a grid basis, not a field basis.  Performance will be
-        # slightly non-deterministic as a result of this, but it should roughly
-        # be statistically alright, assuming (as we do) that this will get
-        # called during largely unbalanced stuff.
-        if len(self._cached_fields) > max_size:
-            to_remove = random.sample(self._cached_fields.keys(),
-                len(self._cached_fields) - max_size)
-            mylog.debug("Purging from cache %s", len(to_remove))
-            for k in to_remove:
-                self._cached_fields.pop(k)
+                    yield field, obj, self._read_obj_field(obj, field, (fid, data))
+        if fid is not None:
+            fid.close()
+        
+    def _read_obj_field(self, obj, field, fid_data = None):
+        if fid_data is None: fid_data = (None, None)
+        fid, data = fid_data
+        if fid is None:
+            close = True
+            fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
         else:
-            mylog.warning("Cache size % 10i (max % 10i)",
-                len(self._cached_fields), max_size)
-
-    def _read_chunk_data(self, chunk, fields):
-        fid = fn = None
-        rv = {}
-        mylog.debug("Preloading fields %s", fields)
-        # Split into particles and non-particles
-        fluid_fields, particle_fields = [], []
-        for ftype, fname in fields:
-            if ftype in self.ds.particle_types:
-                particle_fields.append((ftype, fname))
-            else:
-                fluid_fields.append((ftype, fname))
-        if len(particle_fields) > 0:
-            selector = AlwaysSelector(self.ds)
-            rv.update(self._read_particle_selection(
-              [chunk], selector, particle_fields))
-        if len(fluid_fields) == 0: return rv
-        h5_type = self._field_dtype
-        for g in chunk.objs:
-            rv[g.id] = gf = {}
-            if g.id in self._cached_fields:
-                rv[g.id].update(self._cached_fields[g.id])
-            if g.filename is None: continue
-            elif g.filename != fn:
-                if fid is not None: fid.close()
-                fid = None
-            if fid is None:
-                fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
-                fn = g.filename
-            data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
-            data_view = data.swapaxes(0, -1)
-            for field in fluid_fields:
-                if field in gf:
-                    self._hits += 1
-                    continue
-                self._misses += 1
-                ftype, fname = field
-                try:
-                    node = "/Grid%08i/%s" % (g.id, fname)
-                    dg = h5py.h5d.open(fid, b(node))
-                except KeyError:
-                    if fname == "Dark_Matter_Density": continue
-                    raise
-                dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
-                gf[field] = data_view.copy()
-        if fid: fid.close()
-        if self._cache_on:
-            for gid in rv:
-                self._cached_fields.setdefault(gid, {})
-                self._cached_fields[gid].update(rv[gid])
-        return rv
+            close = False
+        if data is None:
+            data = np.empty(obj.ActiveDimensions[::-1],
+                            dtype=self._field_dtype)
+        ftype, fname = field
+        try:
+            node = "/Grid%08i/%s" % (obj.id, fname)
+            dg = h5py.h5d.open(fid, b(node))
+        except KeyError:
+            if fname == "Dark_Matter_Density": return None
+            raise
+        dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
+        # I don't know why, but on some installations of h5py this works, but
+        # on others, nope.  Doesn't seem to be a version thing.
+        #dg.close()
+        if close:
+            fid.close()
+        return data.T
 
 class IOHandlerPackedHDF5GhostZones(IOHandlerPackedHDF5):
     _dataset_type = "enzo_packed_3d_gz"
@@ -310,11 +171,9 @@
                       slice(NGZ, -NGZ),
                       slice(NGZ, -NGZ))
 
-    def _read_raw_data_set(self, grid, field):
-        f = h5py.File(grid.filename, "r")
-        ds = f["/Grid%08i/%s" % (grid.id, field)][:].swapaxes(0,2)
-        f.close()
-        return ds
+    def _read_obj_field(self, *args, **kwargs):
+        return super(IOHandlerPackedHDF5GhostZones, self)._read_obj_field(
+                *args, **kwargs)[self._base]
 
 class IOHandlerInMemory(BaseIOHandler):
 

diff -r 0b3d4a49ffbbfa267c567f2d1e0ecec12dfccf90 -r 98c877e47f5ce8bbec42b24b5a5c1f1ed8782db3 yt/frontends/flash/io.py
--- a/yt/frontends/flash/io.py
+++ b/yt/frontends/flash/io.py
@@ -18,7 +18,6 @@
 
 from yt.utilities.io_handler import \
     BaseIOHandler
-from yt.utilities.logger import ytLogger as mylog
 from yt.geometry.selection_routines import AlwaysSelector
 from yt.utilities.lib.geometry_utils import \
     compute_morton
@@ -61,6 +60,22 @@
             count_list, conv_factors):
         pass
 
+    def io_iter(self, chunks, fields):
+        f = self._handle
+        for chunk in chunks:
+            for field in fields:
+                # Note that we *prefer* to iterate over the fields on the
+                # outside; here, though, we're iterating over them on the
+                # inside because we may exhaust our chunks.
+                ftype, fname = field
+                ds = f["/%s" % fname]
+                for gs in grid_sequences(chunk.objs):
+                    start = gs[0].id - gs[0]._id_offset
+                    end = gs[-1].id - gs[-1]._id_offset + 1
+                    data = ds[start:end,:,:,:]
+                    for i, g in enumerate(gs):
+                        yield field, g, self._read_obj_field(g, field, (data, i))
+
     def _read_particle_coords(self, chunks, ptf):
         chunks = list(chunks)
         f_part = self._particle_handle
@@ -104,31 +119,18 @@
                     data = p_fields[start:end, fi]
                     yield (ptype, field), data[mask]
 
-    def _read_fluid_selection(self, chunks, selector, fields, size):
-        chunks = list(chunks)
-        if any((ftype != "flash" for ftype, fname in fields)):
-            raise NotImplementedError
-        f = self._handle
-        rv = {}
-        for field in fields:
-            ftype, fname = field
-            # Always use *native* 64-bit float.
-            rv[field] = np.empty(size, dtype="=f8")
-        ng = sum(len(c.objs) for c in chunks)
-        mylog.debug("Reading %s cells of %s fields in %s blocks",
-                    size, [f2 for f1, f2 in fields], ng)
-        for field in fields:
-            ftype, fname = field
-            ds = f["/%s" % fname]
-            ind = 0
-            for chunk in chunks:
-                for gs in grid_sequences(chunk.objs):
-                    start = gs[0].id - gs[0]._id_offset
-                    end = gs[-1].id - gs[-1]._id_offset + 1
-                    data = ds[start:end,:,:,:].transpose()
-                    for i, g in enumerate(gs):
-                        ind += g.select(selector, data[...,i], rv[field], ind)
-        return rv
+    def _read_obj_field(self, obj, field, ds_offset = None):
+        if ds_offset is None: ds_offset = (None, -1)
+        ds, offset = ds_offset
+        # our context here includes datasets and whatnot that are opened in the
+        # hdf5 file
+        if ds is None:
+            ds = self._handle["/%s" % field[1]]
+        if offset == -1:
+            data = ds[obj.id - obj._id_offset, :,:,:].transpose()
+        else:
+            data = ds[offset, :,:,:].transpose()
+        return data
 
     def _read_chunk_data(self, chunk, fields):
         f = self._handle

diff -r 0b3d4a49ffbbfa267c567f2d1e0ecec12dfccf90 -r 98c877e47f5ce8bbec42b24b5a5c1f1ed8782db3 yt/utilities/io_handler.py
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -20,16 +20,28 @@
 from yt.utilities.on_demand_imports import _h5py as h5py
 import numpy as np
 from yt.extern.six import add_metaclass
+from yt.utilities.lru_cache import \
+    local_lru_cache, _make_key
 
 _axis_ids = {0:2,1:1,2:0}
 
 io_registry = {}
 
+use_caching = 0
+
+def _make_io_key( args, *_args, **kwargs):
+    self, obj, field, ctx = args
+    # Ignore self because we have a self-specific cache
+    return _make_key((obj.id, field), *_args, **kwargs)
+
 class RegisteredIOHandler(type):
     def __init__(cls, name, b, d):
         type.__init__(cls, name, b, d)
         if hasattr(cls, "_dataset_type"):
             io_registry[cls._dataset_type] = cls
+        if use_caching and hasattr(cls, "_read_obj_field"):
+            cls._read_obj_field = local_lru_cache(maxsize=use_caching, 
+                    typed=True, make_key=_make_io_key)(cls._read_obj_field)
 
 @add_metaclass(RegisteredIOHandler)
 class BaseIOHandler(object):
@@ -54,7 +66,6 @@
 
     # We need a function for reading a list of sets
     # and a function for *popping* from a queue all the appropriate sets
-
     @contextmanager
     def preload(self, chunk, fields, max_size):
         yield self
@@ -87,7 +98,7 @@
             return return_val
         else:
             return False
-            
+
     def _read_data_set(self, grid, field):
         # check backup file first. if field not found,
         # call frontend-specific io method
@@ -108,6 +119,23 @@
     def _read_data(self, grid, field):
         pass
 
+    def _read_fluid_selection(self, chunks, selector, fields, size):
+        # This function has an interesting history.  It previously was mandate
+        # to be defined by all of the subclasses.  But, to avoid having to
+        # rewrite a whole bunch of IO handlers all at once, and to allow a
+        # better abstraction for grid-based frontends, we're now defining it in
+        # the base class.
+        rv = {field: np.empty(size, dtype="=f8") for field in fields} 
+        ind = {field: 0 for field in fields}
+        for field, obj, data in self.io_iter(chunks, fields):
+            if data is None: continue
+            if selector.__class__.__name__ == "GridSelector":
+                ind[field] += data.size
+                rv[field] = data.copy()
+                continue
+            ind[field] += obj.select(selector, data, rv[field], ind[field])
+        return rv
+
     def _read_data_slice(self, grid, field, axis, coord):
         sl = [slice(None), slice(None), slice(None)]
         sl[axis] = slice(coord, coord + 1)

diff -r 0b3d4a49ffbbfa267c567f2d1e0ecec12dfccf90 -r 98c877e47f5ce8bbec42b24b5a5c1f1ed8782db3 yt/utilities/lru_cache.py
--- a/yt/utilities/lru_cache.py
+++ b/yt/utilities/lru_cache.py
@@ -53,7 +53,7 @@
         return key[0]
     return _HashedSeq(key)
 
-def lru_cache(maxsize=100, typed=False):
+def lru_cache(maxsize=100, typed=False, make_key = _make_key):
     """Least-recently-used cache decorator.
     If *maxsize* is set to None, the LRU features are disabled and the cache
     can grow without bound.
@@ -77,7 +77,6 @@
         cache = dict()
         stats = [0, 0]                  # make statistics updateable non-locally
         HITS, MISSES = 0, 1             # names for the stats fields
-        make_key = _make_key
         cache_get = cache.get           # bound method to lookup key or return None
         _len = len                      # localize the global len() function
         lock = RLock()                  # because linkedlist updates aren't threadsafe
@@ -182,6 +181,8 @@
     return decorating_function
 ### End of backported lru_cache
 
+local_lru_cache = lru_cache
+
 if sys.version_info[:2] >= (3, 3):
     # 3.2 has an lru_cache with an incompatible API
     from functools import lru_cache

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.