[Yt-svn] yt-commit r933 - in trunk/yt: . lagos

mturk at wrangler.dreamhost.com mturk at wrangler.dreamhost.com
Sat Nov 15 10:56:42 PST 2008


Author: mturk
Date: Sat Nov 15 10:56:42 2008
New Revision: 933
URL: http://yt.spacepope.org/changeset/933

Log:
Changes:

* Hierarchy reading in Enzo is now more efficient (could be better, and WILL be better)
* Changed up the way the (parallel) projection joins across processors
* Fixed some config parsing for machines where you only have access to CWD during a run

(These are mostly fixes for working in parallel on Kraken; I'll be documenting
how to generate a CNL-enabled, parallel-capable yt install on the wiki later.)



Modified:
   trunk/yt/config.py
   trunk/yt/lagos/BaseGridType.py
   trunk/yt/lagos/DataReadingFuncs.py
   trunk/yt/lagos/HierarchyType.py
   trunk/yt/lagos/ParallelTools.py

Modified: trunk/yt/config.py
==============================================================================
--- trunk/yt/config.py	(original)
+++ trunk/yt/config.py	Sat Nov 15 10:56:42 2008
@@ -27,6 +27,7 @@
 
 import ConfigParser, os, os.path, types
 
+
 ytcfgDefaults = {
     "fido":{
         'RunDir': os.path.join(os.getenv("HOME"),'.yt/EnzoRuns/'),
@@ -107,8 +108,12 @@
             raise KeyError
         self.set(item[0], item[1], val)
 
-ytcfg = YTConfigParser(['yt.cfg', os.path.expanduser('~/.yt/config')],
-                       ytcfgDefaults)
+if os.path.exists(os.path.expanduser("~/.yt/config")):
+    ytcfg = YTConfigParser(['yt.cfg', os.path.expanduser('~/.yt/config')],
+                           ytcfgDefaults)
+else:
+    ytcfg = YTConfigParser(['yt.cfg'],
+                        ytcfgDefaults)
 
 # Now we have parsed the config file.  Overrides come from the command line.
 

Modified: trunk/yt/lagos/BaseGridType.py
==============================================================================
--- trunk/yt/lagos/BaseGridType.py	(original)
+++ trunk/yt/lagos/BaseGridType.py	Sat Nov 15 10:56:42 2008
@@ -39,6 +39,7 @@
         self.fields = []
         self.start_index = None
         self.id = id
+        if (id % 1e4) == 0: mylog.debug("Prepared grid %s", id)
         if hierarchy: self.hierarchy = weakref.proxy(hierarchy)
         if filename: self.set_filename(filename)
         self.overlap_masks = [None, None, None]

Modified: trunk/yt/lagos/DataReadingFuncs.py
==============================================================================
--- trunk/yt/lagos/DataReadingFuncs.py	(original)
+++ trunk/yt/lagos/DataReadingFuncs.py	Sat Nov 15 10:56:42 2008
@@ -218,7 +218,7 @@
         sets = list(sets)
         for g in grids: files_keys[g.filename].append(g)
         for file in files_keys:
-            mylog.debug("Starting read %s", file)
+            mylog.debug("Starting read %s (%s)", file, sets)
             nodes = [g.id for g in files_keys[file]]
             nodes.sort()
             data = HDF5LightReader.ReadMultipleGrids(file, nodes, sets)

Modified: trunk/yt/lagos/HierarchyType.py
==============================================================================
--- trunk/yt/lagos/HierarchyType.py	(original)
+++ trunk/yt/lagos/HierarchyType.py	Sat Nov 15 10:56:42 2008
@@ -540,8 +540,7 @@
         self.data_style = data_style
         self.hierarchy_filename = os.path.abspath(pf.parameter_filename) \
                                + ".hierarchy"
-        self.__hierarchy_lines = open(self.hierarchy_filename).readlines()
-        if len(self.__hierarchy_lines) == 0:
+        if os.path.getsize(self.hierarchy_filename) == 0:
             raise IOError(-1,"File empty", self.hierarchy_filename)
         self.boundary_filename = os.path.abspath(pf.parameter_filename) \
                                + ".boundary"
@@ -549,11 +548,14 @@
         # Now we search backwards from the end of the file to find out how many
         # grids we have, which allows us to preallocate memory
         self.__hierarchy_string = open(self.hierarchy_filename).read()
-        for line in reversed(self.__hierarchy_lines):
-            if line.startswith("Grid ="):
-                self.num_grids = int(line.split("=")[-1])
+        for line in rlines(open(self.hierarchy_filename, "rb")):
+            if line.startswith("BaryonFileName") or \
+               line.startswith("FileName "):
+                testGrid = line.split("=")[-1].strip().rstrip()
+            if line.startswith("Grid "):
+                self.num_grids = testGridID = int(line.split("=")[-1])
                 break
-        self.__guess_data_style()
+        self.__guess_data_style(testGrid, testGridID)
         # For some reason, r8 seems to want Float64
         if pf.has_key("CompilerPrecision") \
             and pf["CompilerPrecision"] == "r4":
@@ -563,22 +565,15 @@
 
         AMRHierarchy.__init__(self, pf)
 
-        del self.__hierarchy_string, self.__hierarchy_lines
+        del self.__hierarchy_string 
 
     def _setup_classes(self):
         dd = self._get_data_reader_dict()
         self.grid = classobj("EnzoGrid",(EnzoGridBase,), dd)
         AMRHierarchy._setup_classes(self, dd)
 
-    def __guess_data_style(self):
+    def __guess_data_style(self, testGrid, testGridID):
         if self.data_style: return
-        for line in reversed(self.__hierarchy_lines):
-            if line.startswith("BaryonFileName") or \
-               line.startswith("FileName "):
-                testGrid = line.split("=")[-1].strip().rstrip()
-            if line.startswith("Grid "):
-                testGridID = int(line.split("=")[-1])
-                break
         if testGrid[0] != os.path.sep:
             testGrid = os.path.join(self.directory, testGrid)
         if not os.path.exists(testGrid):
@@ -651,14 +646,13 @@
             for v in vals.split():
                 toAdd[curGrid-1,j] = func(v)
                 j+=1
-        for line_index, line in enumerate(self.__hierarchy_lines):
+        for line_index, line in enumerate(open(self.hierarchy_filename)):
             # We can do this the slow, 'reliable' way by stripping
             # or we can manually pad all our strings, which speeds it up by a
             # factor of about ten
             #param, vals = map(strip,line.split("="))
             if (line_index % 1e5) == 0:
-                mylog.debug("Parsing line % 9i / % 9i",
-                            line_index, len(self.__hierarchy_lines))
+                mylog.debug("Parsing line % 9i", line_index)
             if len(line) < 2:
                 continue
             param, vals = line.split("=")
@@ -965,3 +959,47 @@
         rs += "(%s)\s*" % (scanf_regex[t])
     rs +="$"
     return re.compile(rs,re.M)
+
+# These next two functions are taken from
+# http://www.reddit.com/r/Python/comments/6hj75/reverse_file_iterator/c03vms4
+# Credit goes to "Brian" on Reddit
+
+def rblocks(f, blocksize=4096*256):
+    """Read file as series of blocks from end of file to start.
+
+    The data itself is in normal order, only the order of the blocks is reversed.
+    ie. "hello world" -> ["ld","wor", "lo ", "hel"]
+    Note that the file must be opened in binary mode.
+    """
+    if 'b' not in f.mode.lower():
+        raise Exception("File must be opened using binary mode.")
+    size = os.stat(f.name).st_size
+    fullblocks, lastblock = divmod(size, blocksize)
+
+    # The first(end of file) block will be short, since this leaves 
+    # the rest aligned on a blocksize boundary.  This may be more 
+    # efficient than having the last (first in file) block be short
+    f.seek(-lastblock,2)
+    yield f.read(lastblock)
+
+    for i in range(fullblocks-1,-1, -1):
+        f.seek(i * blocksize)
+        yield f.read(blocksize)
+
+def rlines(f, keepends=False):
+    """Iterate through the lines of a file in reverse order.
+
+    If keepends is true, line endings are kept as part of the line.
+    """
+    buf = ''
+    for block in rblocks(f):
+        buf = block + buf
+        lines = buf.splitlines(keepends)
+        # Return all lines except the first (since may be partial)
+        if lines:
+            lines.reverse()
+            buf = lines.pop() # Last line becomes end of new first line.
+            for line in lines:
+                yield line
+    yield buf  # First line.
+

Modified: trunk/yt/lagos/ParallelTools.py
==============================================================================
--- trunk/yt/lagos/ParallelTools.py	(original)
+++ trunk/yt/lagos/ParallelTools.py	Sat Nov 15 10:56:42 2008
@@ -28,7 +28,8 @@
 import yt.logger
 import itertools, sys
 
-if os.path.basename(sys.executable) in ["mpi4py"]:
+if os.path.basename(sys.executable) in ["mpi4py"] \
+    or "--parallel" in sys.argv:
     from mpi4py import MPI
     parallel_capable = (MPI.COMM_WORLD.size > 1)
     if parallel_capable:
@@ -163,7 +164,8 @@
         LE[y_dict[axis]] = y[0]
         RE[y_dict[axis]] = y[1]
 
-        return True, self.hierarchy.region(self.center, LE, RE)
+        reg = self.hierarchy.region(self.center, LE, RE)
+        return True, reg
 
     def _partition_hierarchy_3d(self, padding=0.0):
         if not parallel_capable:
@@ -190,12 +192,21 @@
     def _mpi_catdict(self, data):
         mylog.debug("Opening MPI Barrier on %s", MPI.COMM_WORLD.rank)
         MPI.COMM_WORLD.Barrier()
-        if MPI.COMM_WORLD.rank == 0:
-            data = self.__mpi_recvdict(data)
-        else:
-            MPI.COMM_WORLD.Send(data, dest=0, tag=0)
-        mylog.debug("Opening MPI Broadcast on %s", MPI.COMM_WORLD.rank)
-        data = MPI.COMM_WORLD.Bcast(data, root=0)
+        field_keys = data.keys()
+        field_keys.sort()
+        np = MPI.COMM_WORLD.size
+        for key in field_keys:
+            mylog.debug("Joining %s (%s) on %s", key, type(data[key]),
+                        MPI.COMM_WORLD.rank)
+            if MPI.COMM_WORLD.rank == 0:
+                data[key] = na.concatenate(
+                 [MPI.COMM_WORLD.Recv(source=i, tag=0) for i in range(1, np)],
+                    axis=-1)
+            else:
+                MPI.COMM_WORLD.Send(data[key], dest=0, tag=0)
+            MPI.COMM_WORLD.Barrier()
+            data[key] = MPI.COMM_WORLD.Bcast(data[key], root=0)
+        mylog.debug("Done joining dictionary on %s", MPI.COMM_WORLD.rank)
         MPI.COMM_WORLD.Barrier()
         return data
 



More information about the yt-svn mailing list