[yt-svn] commit/yt: 4 new changesets

Wed Apr 2 07:27:19 PDT 2014

4 new commits in yt:

https://bitbucket.org/yt_analysis/yt/commits/f1c3c7da2ec2/
Changeset:   f1c3c7da2ec2
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-03-28 22:06:57
Summary:     Implementing local_only for IO chunking.

This helps with inline datasets.  However, it needs to be threaded through the
other frontends.
Affected #:  4 files

diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/data_objects/construction_data_containers.py

--- a/yt/data_objects/construction_data_containers.py
+++ b/yt/data_objects/construction_data_containers.py
@@ -273,12 +273,12 @@
             chunk_fields.append(self.weight_field)
         tree = self._get_tree(len(fields))
         # We do this once
-        for chunk in self.data_source.chunks([], "io"):
+        for chunk in self.data_source.chunks([], "io", local_only = False):
             self._initialize_chunk(chunk, tree)
         # This needs to be parallel_objects-ified
         with self.data_source._field_parameter_state(self.field_parameters):
             for chunk in parallel_objects(self.data_source.chunks(
-                                          chunk_fields, "io")): 
+                                          chunk_fields, "io", local_only = True)): 
                 mylog.debug("Adding chunk (%s) to tree (%0.3e GB RAM)", chunk.ires.size,
                     get_memory_usage()/1024.)
                 self._handle_chunk(chunk, fields, tree)

diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/frontends/enzo/data_structures.py
--- a/yt/frontends/enzo/data_structures.py
+++ b/yt/frontends/enzo/data_structures.py
@@ -32,6 +32,8 @@
     AMRGridPatch
 from yt.geometry.grid_geometry_handler import \
     GridIndex
+from yt.geometry.geometry_handler import \
+    YTDataChunk
 from yt.data_objects.static_output import \
     Dataset
 from yt.fields.field_info_container import \
@@ -562,7 +564,7 @@
         self.grids = np.empty(len(grids), dtype='object')
         for i, grid in enumerate(grids):
             if (i%1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids)
-            grid.filename = None
+            grid.filename = "Inline_processor_%07i" % (self.grid_procs[i,0])
             grid._prepare_grid()
             grid.proc_num = self.grid_procs[i,0]
             self.grids[i] = grid
@@ -600,6 +602,20 @@
             random_sample = np.mgrid[0:max(len(my_grids)-1,1)].astype("int32")
         return my_grids[(random_sample,)]
 
+    def _chunk_io(self, dobj, cache = True, local_only = False):
+        gfiles = defaultdict(list)
+        gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
+        for g in gobjs:
+            gfiles[g.filename].append(g)
+        for fn in sorted(gfiles):
+            if local_only:
+                gobjs = [g for g in gfiles[fn] if g.proc_num == self.comm.rank]
+                gfiles[fn] = gobjs
+            gs = gfiles[fn]
+            count = self._count_selection(dobj, gs)
+            yield YTDataChunk(dobj, "io", gs, count, cache = cache)
+
+
 class EnzoHierarchy1D(EnzoHierarchy):
 
     def _fill_arrays(self, ei, si, LE, RE, npart, nap):

diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -250,7 +250,6 @@
         if size is None:
             size = sum((g.count(selector) for chunk in chunks
                         for g in chunk.objs))
-
         for field in fields:
             ftype, fname = field
             fsize = size
@@ -262,11 +261,14 @@
         ind = 0
         for chunk in chunks:
             for g in chunk.objs:
-                if g.id not in self.grids_in_memory: continue
+                # We want a *hard error* here.
+                #if g.id not in self.grids_in_memory: continue
                 for field in fields:
                     ftype, fname = field
                     data_view = self.grids_in_memory[g.id][fname][self.my_slice]
-                    ind += g.select(selector, data_view, rv[field], ind)
+                    nd = g.select(selector, data_view, rv[field], ind)
+                ind += nd
+        assert(ind == fsize)
         return rv
 
     def _read_particle_coords(self, chunks, ptf):

diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -317,7 +317,9 @@
             # individual grids.
             yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
+        # local_only is only useful for inline datasets and requires
+        # implementation by subclasses.
         gfiles = defaultdict(list)
         gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for g in gobjs:


https://bitbucket.org/yt_analysis/yt/commits/9d3e991ddcec/
Changeset:   9d3e991ddcec
Branch:      yt-3.0
User:        samskillman
Date:        2014-03-28 22:12:49
Summary:     A swapaxes is needed here
Affected #:  1 file

diff -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 -r 9d3e991ddceccb3db33572517c03a36fd0b22add yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -265,7 +265,7 @@
                 #if g.id not in self.grids_in_memory: continue
                 for field in fields:
                     ftype, fname = field
-                    data_view = self.grids_in_memory[g.id][fname][self.my_slice]
+                    data_view = self.grids_in_memory[g.id][fname][self.my_slice].swapaxes(0,2)
                     nd = g.select(selector, data_view, rv[field], ind)
                 ind += nd
         assert(ind == fsize)


https://bitbucket.org/yt_analysis/yt/commits/bb5982edde6a/
Changeset:   bb5982edde6a
Branch:      yt-3.0
User:        samskillman
Date:        2014-03-28 22:53:01
Summary:     Threading through local_only=False to all other frontends with chunk_io.
Affected #:  4 files

diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/art/data_structures.py
--- a/yt/frontends/art/data_structures.py
+++ b/yt/frontends/art/data_structures.py
@@ -152,7 +152,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g], None)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         """
         Since subsets are calculated per domain,
         i.e. per file, yield each domain at a time to

diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/artio/data_structures.py
--- a/yt/frontends/artio/data_structures.py
+++ b/yt/frontends/artio/data_structures.py
@@ -297,7 +297,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g], None, cache = True)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         # _current_chunk is made from identify_base_chunk
         oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for chunk in oobjs:

diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/ramses/data_structures.py
--- a/yt/frontends/ramses/data_structures.py
+++ b/yt/frontends/ramses/data_structures.py
@@ -432,7 +432,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g], None)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for subset in oobjs:
             yield YTDataChunk(dobj, "io", [subset], None, cache = cache)

diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/stream/data_structures.py
--- a/yt/frontends/stream/data_structures.py
+++ b/yt/frontends/stream/data_structures.py
@@ -1300,7 +1300,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g])
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for subset in oobjs:
             yield YTDataChunk(dobj, "io", [subset], None, cache = cache)


https://bitbucket.org/yt_analysis/yt/commits/2ef55a7a3a8b/
Changeset:   2ef55a7a3a8b
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-04-02 16:27:12
Summary:     Merged in samskillman/yt/yt-3.0 (pull request #778)

Inline chunking
Affected #:  8 files

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/data_objects/construction_data_containers.py
--- a/yt/data_objects/construction_data_containers.py
+++ b/yt/data_objects/construction_data_containers.py
@@ -273,12 +273,12 @@
             chunk_fields.append(self.weight_field)
         tree = self._get_tree(len(fields))
         # We do this once
-        for chunk in self.data_source.chunks([], "io"):
+        for chunk in self.data_source.chunks([], "io", local_only = False):
             self._initialize_chunk(chunk, tree)
         # This needs to be parallel_objects-ified
         with self.data_source._field_parameter_state(self.field_parameters):
             for chunk in parallel_objects(self.data_source.chunks(
-                                          chunk_fields, "io")): 
+                                          chunk_fields, "io", local_only = True)): 
                 mylog.debug("Adding chunk (%s) to tree (%0.3e GB RAM)", chunk.ires.size,
                     get_memory_usage()/1024.)
                 self._handle_chunk(chunk, fields, tree)

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/art/data_structures.py
--- a/yt/frontends/art/data_structures.py
+++ b/yt/frontends/art/data_structures.py
@@ -152,7 +152,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g], None)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         """
         Since subsets are calculated per domain,
         i.e. per file, yield each domain at a time to

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/artio/data_structures.py
--- a/yt/frontends/artio/data_structures.py
+++ b/yt/frontends/artio/data_structures.py
@@ -297,7 +297,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g], None, cache = True)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         # _current_chunk is made from identify_base_chunk
         oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for chunk in oobjs:

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/enzo/data_structures.py
--- a/yt/frontends/enzo/data_structures.py
+++ b/yt/frontends/enzo/data_structures.py
@@ -32,6 +32,8 @@
     AMRGridPatch
 from yt.geometry.grid_geometry_handler import \
     GridIndex
+from yt.geometry.geometry_handler import \
+    YTDataChunk
 from yt.data_objects.static_output import \
     Dataset
 from yt.fields.field_info_container import \
@@ -562,7 +564,7 @@
         self.grids = np.empty(len(grids), dtype='object')
         for i, grid in enumerate(grids):
             if (i%1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids)
-            grid.filename = None
+            grid.filename = "Inline_processor_%07i" % (self.grid_procs[i,0])
             grid._prepare_grid()
             grid.proc_num = self.grid_procs[i,0]
             self.grids[i] = grid
@@ -600,6 +602,20 @@
             random_sample = np.mgrid[0:max(len(my_grids)-1,1)].astype("int32")
         return my_grids[(random_sample,)]
 
+    def _chunk_io(self, dobj, cache = True, local_only = False):
+        gfiles = defaultdict(list)
+        gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
+        for g in gobjs:
+            gfiles[g.filename].append(g)
+        for fn in sorted(gfiles):
+            if local_only:
+                gobjs = [g for g in gfiles[fn] if g.proc_num == self.comm.rank]
+                gfiles[fn] = gobjs
+            gs = gfiles[fn]
+            count = self._count_selection(dobj, gs)
+            yield YTDataChunk(dobj, "io", gs, count, cache = cache)
+
+
 class EnzoHierarchy1D(EnzoHierarchy):
 
     def _fill_arrays(self, ei, si, LE, RE, npart, nap):

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -250,7 +250,6 @@
         if size is None:
             size = sum((g.count(selector) for chunk in chunks
                         for g in chunk.objs))
-
         for field in fields:
             ftype, fname = field
             fsize = size
@@ -262,11 +261,14 @@
         ind = 0
         for chunk in chunks:
             for g in chunk.objs:
-                if g.id not in self.grids_in_memory: continue
+                # We want a *hard error* here.
+                #if g.id not in self.grids_in_memory: continue
                 for field in fields:
                     ftype, fname = field
-                    data_view = self.grids_in_memory[g.id][fname][self.my_slice]
-                    ind += g.select(selector, data_view, rv[field], ind)
+                    data_view = self.grids_in_memory[g.id][fname][self.my_slice].swapaxes(0,2)
+                    nd = g.select(selector, data_view, rv[field], ind)
+                ind += nd
+        assert(ind == fsize)
         return rv
 
     def _read_particle_coords(self, chunks, ptf):

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/ramses/data_structures.py
--- a/yt/frontends/ramses/data_structures.py
+++ b/yt/frontends/ramses/data_structures.py
@@ -432,7 +432,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g], None)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for subset in oobjs:
             yield YTDataChunk(dobj, "io", [subset], None, cache = cache)

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/stream/data_structures.py
--- a/yt/frontends/stream/data_structures.py
+++ b/yt/frontends/stream/data_structures.py
@@ -1300,7 +1300,7 @@
                 g = og
             yield YTDataChunk(dobj, "spatial", [g])
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
         oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for subset in oobjs:
             yield YTDataChunk(dobj, "io", [subset], None, cache = cache)

diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -317,7 +317,9 @@
             # individual grids.
             yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
 
-    def _chunk_io(self, dobj, cache = True):
+    def _chunk_io(self, dobj, cache = True, local_only = False):
+        # local_only is only useful for inline datasets and requires
+        # implementation by subclasses.
         gfiles = defaultdict(list)
         gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
         for g in gobjs:

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.