[yt-svn] commit/yt: 4 new changesets
commits-noreply at bitbucket.org
commits-noreply at bitbucket.org
Wed Apr 2 07:27:19 PDT 2014
4 new commits in yt:
https://bitbucket.org/yt_analysis/yt/commits/f1c3c7da2ec2/
Changeset: f1c3c7da2ec2
Branch: yt-3.0
User: MatthewTurk
Date: 2014-03-28 22:06:57
Summary: Implementing local_only for IO chunking.
This helps with inline datasets. However, it needs to be threaded through the
other frontends.
Affected #: 4 files
diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/data_objects/construction_data_containers.py
--- a/yt/data_objects/construction_data_containers.py
+++ b/yt/data_objects/construction_data_containers.py
@@ -273,12 +273,12 @@
chunk_fields.append(self.weight_field)
tree = self._get_tree(len(fields))
# We do this once
- for chunk in self.data_source.chunks([], "io"):
+ for chunk in self.data_source.chunks([], "io", local_only = False):
self._initialize_chunk(chunk, tree)
# This needs to be parallel_objects-ified
with self.data_source._field_parameter_state(self.field_parameters):
for chunk in parallel_objects(self.data_source.chunks(
- chunk_fields, "io")):
+ chunk_fields, "io", local_only = True)):
mylog.debug("Adding chunk (%s) to tree (%0.3e GB RAM)", chunk.ires.size,
get_memory_usage()/1024.)
self._handle_chunk(chunk, fields, tree)
diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/frontends/enzo/data_structures.py
--- a/yt/frontends/enzo/data_structures.py
+++ b/yt/frontends/enzo/data_structures.py
@@ -32,6 +32,8 @@
AMRGridPatch
from yt.geometry.grid_geometry_handler import \
GridIndex
+from yt.geometry.geometry_handler import \
+ YTDataChunk
from yt.data_objects.static_output import \
Dataset
from yt.fields.field_info_container import \
@@ -562,7 +564,7 @@
self.grids = np.empty(len(grids), dtype='object')
for i, grid in enumerate(grids):
if (i%1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids)
- grid.filename = None
+ grid.filename = "Inline_processor_%07i" % (self.grid_procs[i,0])
grid._prepare_grid()
grid.proc_num = self.grid_procs[i,0]
self.grids[i] = grid
@@ -600,6 +602,20 @@
random_sample = np.mgrid[0:max(len(my_grids)-1,1)].astype("int32")
return my_grids[(random_sample,)]
+ def _chunk_io(self, dobj, cache = True, local_only = False):
+ gfiles = defaultdict(list)
+ gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
+ for g in gobjs:
+ gfiles[g.filename].append(g)
+ for fn in sorted(gfiles):
+ if local_only:
+ gobjs = [g for g in gfiles[fn] if g.proc_num == self.comm.rank]
+ gfiles[fn] = gobjs
+ gs = gfiles[fn]
+ count = self._count_selection(dobj, gs)
+ yield YTDataChunk(dobj, "io", gs, count, cache = cache)
+
+
class EnzoHierarchy1D(EnzoHierarchy):
def _fill_arrays(self, ei, si, LE, RE, npart, nap):
diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -250,7 +250,6 @@
if size is None:
size = sum((g.count(selector) for chunk in chunks
for g in chunk.objs))
-
for field in fields:
ftype, fname = field
fsize = size
@@ -262,11 +261,14 @@
ind = 0
for chunk in chunks:
for g in chunk.objs:
- if g.id not in self.grids_in_memory: continue
+ # We want a *hard error* here.
+ #if g.id not in self.grids_in_memory: continue
for field in fields:
ftype, fname = field
data_view = self.grids_in_memory[g.id][fname][self.my_slice]
- ind += g.select(selector, data_view, rv[field], ind)
+ nd = g.select(selector, data_view, rv[field], ind)
+ ind += nd
+ assert(ind == fsize)
return rv
def _read_particle_coords(self, chunks, ptf):
diff -r 19470c49dd8c1c5c66ee7a8bd638940761b5d3e9 -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -317,7 +317,9 @@
# individual grids.
yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
+ # local_only is only useful for inline datasets and requires
+ # implementation by subclasses.
gfiles = defaultdict(list)
gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for g in gobjs:
https://bitbucket.org/yt_analysis/yt/commits/9d3e991ddcec/
Changeset: 9d3e991ddcec
Branch: yt-3.0
User: samskillman
Date: 2014-03-28 22:12:49
Summary: A swapaxes is needed here
Affected #: 1 file
diff -r f1c3c7da2ec2daf4c3abb2db76f448bc7ee5a549 -r 9d3e991ddceccb3db33572517c03a36fd0b22add yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -265,7 +265,7 @@
#if g.id not in self.grids_in_memory: continue
for field in fields:
ftype, fname = field
- data_view = self.grids_in_memory[g.id][fname][self.my_slice]
+ data_view = self.grids_in_memory[g.id][fname][self.my_slice].swapaxes(0,2)
nd = g.select(selector, data_view, rv[field], ind)
ind += nd
assert(ind == fsize)
https://bitbucket.org/yt_analysis/yt/commits/bb5982edde6a/
Changeset: bb5982edde6a
Branch: yt-3.0
User: samskillman
Date: 2014-03-28 22:53:01
Summary: Threading through local_only=False to all other frontends with chunk_io.
Affected #: 4 files
diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/art/data_structures.py
--- a/yt/frontends/art/data_structures.py
+++ b/yt/frontends/art/data_structures.py
@@ -152,7 +152,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g], None)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
"""
Since subsets are calculated per domain,
i.e. per file, yield each domain at a time to
diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/artio/data_structures.py
--- a/yt/frontends/artio/data_structures.py
+++ b/yt/frontends/artio/data_structures.py
@@ -297,7 +297,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g], None, cache = True)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
# _current_chunk is made from identify_base_chunk
oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for chunk in oobjs:
diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/ramses/data_structures.py
--- a/yt/frontends/ramses/data_structures.py
+++ b/yt/frontends/ramses/data_structures.py
@@ -432,7 +432,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g], None)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for subset in oobjs:
yield YTDataChunk(dobj, "io", [subset], None, cache = cache)
diff -r 9d3e991ddceccb3db33572517c03a36fd0b22add -r bb5982edde6a22ea7f80a99613de70417eeed473 yt/frontends/stream/data_structures.py
--- a/yt/frontends/stream/data_structures.py
+++ b/yt/frontends/stream/data_structures.py
@@ -1300,7 +1300,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g])
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for subset in oobjs:
yield YTDataChunk(dobj, "io", [subset], None, cache = cache)
https://bitbucket.org/yt_analysis/yt/commits/2ef55a7a3a8b/
Changeset: 2ef55a7a3a8b
Branch: yt-3.0
User: MatthewTurk
Date: 2014-04-02 16:27:12
Summary: Merged in samskillman/yt/yt-3.0 (pull request #778)
Inline chunking
Affected #: 8 files
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/data_objects/construction_data_containers.py
--- a/yt/data_objects/construction_data_containers.py
+++ b/yt/data_objects/construction_data_containers.py
@@ -273,12 +273,12 @@
chunk_fields.append(self.weight_field)
tree = self._get_tree(len(fields))
# We do this once
- for chunk in self.data_source.chunks([], "io"):
+ for chunk in self.data_source.chunks([], "io", local_only = False):
self._initialize_chunk(chunk, tree)
# This needs to be parallel_objects-ified
with self.data_source._field_parameter_state(self.field_parameters):
for chunk in parallel_objects(self.data_source.chunks(
- chunk_fields, "io")):
+ chunk_fields, "io", local_only = True)):
mylog.debug("Adding chunk (%s) to tree (%0.3e GB RAM)", chunk.ires.size,
get_memory_usage()/1024.)
self._handle_chunk(chunk, fields, tree)
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/art/data_structures.py
--- a/yt/frontends/art/data_structures.py
+++ b/yt/frontends/art/data_structures.py
@@ -152,7 +152,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g], None)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
"""
Since subsets are calculated per domain,
i.e. per file, yield each domain at a time to
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/artio/data_structures.py
--- a/yt/frontends/artio/data_structures.py
+++ b/yt/frontends/artio/data_structures.py
@@ -297,7 +297,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g], None, cache = True)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
# _current_chunk is made from identify_base_chunk
oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for chunk in oobjs:
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/enzo/data_structures.py
--- a/yt/frontends/enzo/data_structures.py
+++ b/yt/frontends/enzo/data_structures.py
@@ -32,6 +32,8 @@
AMRGridPatch
from yt.geometry.grid_geometry_handler import \
GridIndex
+from yt.geometry.geometry_handler import \
+ YTDataChunk
from yt.data_objects.static_output import \
Dataset
from yt.fields.field_info_container import \
@@ -562,7 +564,7 @@
self.grids = np.empty(len(grids), dtype='object')
for i, grid in enumerate(grids):
if (i%1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids)
- grid.filename = None
+ grid.filename = "Inline_processor_%07i" % (self.grid_procs[i,0])
grid._prepare_grid()
grid.proc_num = self.grid_procs[i,0]
self.grids[i] = grid
@@ -600,6 +602,20 @@
random_sample = np.mgrid[0:max(len(my_grids)-1,1)].astype("int32")
return my_grids[(random_sample,)]
+ def _chunk_io(self, dobj, cache = True, local_only = False):
+ gfiles = defaultdict(list)
+ gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
+ for g in gobjs:
+ gfiles[g.filename].append(g)
+ for fn in sorted(gfiles):
+ if local_only:
+ gobjs = [g for g in gfiles[fn] if g.proc_num == self.comm.rank]
+ gfiles[fn] = gobjs
+ gs = gfiles[fn]
+ count = self._count_selection(dobj, gs)
+ yield YTDataChunk(dobj, "io", gs, count, cache = cache)
+
+
class EnzoHierarchy1D(EnzoHierarchy):
def _fill_arrays(self, ei, si, LE, RE, npart, nap):
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/enzo/io.py
--- a/yt/frontends/enzo/io.py
+++ b/yt/frontends/enzo/io.py
@@ -250,7 +250,6 @@
if size is None:
size = sum((g.count(selector) for chunk in chunks
for g in chunk.objs))
-
for field in fields:
ftype, fname = field
fsize = size
@@ -262,11 +261,14 @@
ind = 0
for chunk in chunks:
for g in chunk.objs:
- if g.id not in self.grids_in_memory: continue
+ # We want a *hard error* here.
+ #if g.id not in self.grids_in_memory: continue
for field in fields:
ftype, fname = field
- data_view = self.grids_in_memory[g.id][fname][self.my_slice]
- ind += g.select(selector, data_view, rv[field], ind)
+ data_view = self.grids_in_memory[g.id][fname][self.my_slice].swapaxes(0,2)
+ nd = g.select(selector, data_view, rv[field], ind)
+ ind += nd
+ assert(ind == fsize)
return rv
def _read_particle_coords(self, chunks, ptf):
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/ramses/data_structures.py
--- a/yt/frontends/ramses/data_structures.py
+++ b/yt/frontends/ramses/data_structures.py
@@ -432,7 +432,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g], None)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for subset in oobjs:
yield YTDataChunk(dobj, "io", [subset], None, cache = cache)
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/frontends/stream/data_structures.py
--- a/yt/frontends/stream/data_structures.py
+++ b/yt/frontends/stream/data_structures.py
@@ -1300,7 +1300,7 @@
g = og
yield YTDataChunk(dobj, "spatial", [g])
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for subset in oobjs:
yield YTDataChunk(dobj, "io", [subset], None, cache = cache)
diff -r 47732ca13d1b4ca5d650f7c31e43a84370f4e52a -r 2ef55a7a3a8b8ba66fd8f68c31b744b91ff361ee yt/geometry/grid_geometry_handler.py
--- a/yt/geometry/grid_geometry_handler.py
+++ b/yt/geometry/grid_geometry_handler.py
@@ -317,7 +317,9 @@
# individual grids.
yield YTDataChunk(dobj, "spatial", [g], size, cache = False)
- def _chunk_io(self, dobj, cache = True):
+ def _chunk_io(self, dobj, cache = True, local_only = False):
+ # local_only is only useful for inline datasets and requires
+ # implementation by subclasses.
gfiles = defaultdict(list)
gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info)
for g in gobjs:
Repository URL: https://bitbucket.org/yt_analysis/yt/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the yt-svn
mailing list