[yt-svn] commit/yt: 3 new changesets

Thu Jul 26 10:52:30 PDT 2012

3 new commits in yt:


https://bitbucket.org/yt_analysis/yt/changeset/a6136e2be34f/
changeset:   a6136e2be34f
branch:      yt
user:        MatthewTurk
date:        2012-07-26 15:45:54
summary:     Allow TimeSeriesData.from_filenames to accept a string, in which case it will
glob and sort the string.
affected #:  1 file

diff -r b59774e854fa4233250735434264b55743d630df -r a6136e2be34f4e1a0933b35b39ccef3100740bcd yt/data_objects/time_series.py

--- a/yt/data_objects/time_series.py
+++ b/yt/data_objects/time_series.py
@@ -23,7 +23,7 @@
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
-import inspect, functools, weakref
+import inspect, functools, weakref, glob, types
 
 from yt.funcs import *
 from yt.convenience import load
@@ -144,6 +144,9 @@
 
     @classmethod
     def from_filenames(cls, filename_list, parallel = True):
+        if isinstance(filename_list, types.StringTypes):
+            filename_list = glob.glob(filename_list)
+            filename_list.sort()
         obj = cls(filename_list[:], parallel = parallel)
         return obj
 



https://bitbucket.org/yt_analysis/yt/changeset/4e073a8c644d/
changeset:   4e073a8c644d
branch:      yt
user:        MatthewTurk
date:        2012-07-26 16:57:09
summary:     Adding docstring to piter and __init__ for TimeSeriesData, and for
parallel_objects.
affected #:  2 files

diff -r a6136e2be34f4e1a0933b35b39ccef3100740bcd -r 4e073a8c644d21fb5c0a770e59fc07d02e57d363 yt/data_objects/time_series.py
--- a/yt/data_objects/time_series.py
+++ b/yt/data_objects/time_series.py
@@ -79,6 +79,27 @@
 
 class TimeSeriesData(object):
     def __init__(self, outputs, parallel = True):
+        r"""The TimeSeriesData object is a container of multiple datasets,
+        allowing easy iteration and computation on them.
+
+        TimeSeriesData objects are designed to provide easy ways to access,
+        analyze, parallelize and visualize multiple datasets sequentially.  This is
+        primarily expressed through iteration, but can also be constructed via
+        analysis tasks (see :ref:`time-series-analysis`).
+
+        The best method to construct TimeSeriesData objects is through 
+        :meth:`~yt.data_objects.time_series.TimeSeriesData.from_filenames`.
+
+
+        Examples
+        --------
+
+        >>> ts = TimeSeriesData.from_filenames(
+                "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+        >>> for pf in ts:
+        ...     SlicePlot(pf, "x", "Density").save()
+
+        """
         self.tasks = AnalysisTaskProxy(self)
         self.params = TimeSeriesParametersContainer(self)
         self._pre_outputs = outputs[:]
@@ -110,6 +131,65 @@
         return len(self._pre_outputs)
 
     def piter(self, storage = None):
+        r"""Iterate over time series components in parallel.
+
+        This allows you to iterate over a time series while dispatching
+        individual components of that time series to different processors or
+        processor groups.  If the parallelism strategy was set to be
+        multi-processor (by "parallel = N" where N is an integer when the
+        TimeSeriesData was created) this will issue each dataset to an
+        N-processor group.  For instance, this would allow you to start a 1024
+        processor job, loading up 100 datasets in a time series and creating 8
+        processor groups of 128 processors each, each of which would be
+        assigned a different dataset.  This could be accomplished as shown in
+        the examples below.  The *storage* option is as seen in
+        :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects`
+        which is a mechanism for storing results of analysis on an individual
+        dataset and then combining the results at the end, so that the entire
+        set of processors have access to those results.
+
+        Note that supplying a *store* changes the iteration mechanism; see
+        below.
+
+        Parameters
+        ----------
+        storage : dict
+            This is a dictionary, which will be filled with results during the
+            course of the iteration.  The keys will be the parameter file
+            indices and the values will be whatever is assigned to the *result*
+            attribute on the storage during iteration.
+
+        Examples
+        --------
+        Here is an example of iteration when the results do not need to be
+        stored.  One processor will be assigned to each parameter file.
+
+        >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+        >>> for pf in ts.piter():
+        ...    SlicePlot(pf, "x", "Density").save()
+        ...
+        
+        This demonstrates how one might store results:
+
+        >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+        >>> storage = {}
+        >>> for sto, pf in ts.piter():
+        ...     v, c = pf.h.find_max("Density")
+        ...     sto.result = (v, c)
+        ...
+        >>> for i, (v, c) in sorted(storage.items()):
+        ...     print "% 4i  %0.3e" % (i, v)
+        ...
+
+        This shows how to dispatch 4 processors to each dataset:
+
+        >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy",
+        ...                     parallel = 4)
+        >>> for pf in ts.piter():
+        ...     ProjectionPlot(pf, "x", "Density").save()
+        ...
+
+        """
         dynamic = False
         if self.parallel == False:
             njobs = 1
@@ -143,11 +223,44 @@
         return [v for k, v in sorted(return_values.items())]
 
     @classmethod
-    def from_filenames(cls, filename_list, parallel = True):
-        if isinstance(filename_list, types.StringTypes):
-            filename_list = glob.glob(filename_list)
-            filename_list.sort()
-        obj = cls(filename_list[:], parallel = parallel)
+    def from_filenames(cls, filenames, parallel = True):
+        r"""Create a time series from either a filename pattern or a list of
+        filenames.
+
+        This method provides an easy way to create a
+        :class:`~yt.data_objects.time_series.TimeSeriesData`, given a set of
+        filenames or a pattern that matches them.  Additionally, it can set the
+        parallelism strategy.
+
+        Parameters
+        ----------
+        filenames : list or pattern
+            This can either be a list of filenames (such as ["DD0001/DD0001",
+            "DD0002/DD0002"]) or a pattern to match, such as
+            "DD*/DD*.hierarchy").  If it's the former, they will be loaded in
+            order.  The latter will be identified with the glob module and then
+            sorted.
+        parallel : True, False or int
+            This parameter governs the behavior when .piter() is called on the
+            resultant TimeSeriesData object.  If this is set to False, the time
+            series will not iterate in parallel when .piter() is called.  If
+            this is set to either True or an integer, it will be iterated with
+            1 or that integer number of processors assigned to each parameter
+            file provided to the loop.
+
+        Examples
+        --------
+
+        >>> ts = TimeSeriesData.from_filenames(
+                "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+        >>> for pf in ts:
+        ...     SlicePlot(pf, "x", "Density").save()
+
+        """
+        if isinstance(filenames, types.StringTypes):
+            filenames = glob.glob(filenames)
+            filenames.sort()
+        obj = cls(filenames[:], parallel = parallel)
         return obj
 
     @classmethod


diff -r a6136e2be34f4e1a0933b35b39ccef3100740bcd -r 4e073a8c644d21fb5c0a770e59fc07d02e57d363 yt/utilities/parallel_tools/parallel_analysis_interface.py
--- a/yt/utilities/parallel_tools/parallel_analysis_interface.py
+++ b/yt/utilities/parallel_tools/parallel_analysis_interface.py
@@ -344,6 +344,67 @@
 
 def parallel_objects(objects, njobs = 0, storage = None, barrier = True,
                      dynamic = False):
+    r"""This function dispatches components of an iterable to different
+    processors.
+
+    The parallel_objects function accepts an iterable, *objects*, and based on
+    the number of jobs requested and number of available processors, decides
+    how to dispatch individual objects to processors or sets of processors.
+    This can implicitly include multi-level parallelism, such that the
+    processor groups assigned each object can be composed of several or even
+    hundreds of processors.  *storage* is also available, for collation of
+    results at the end of the iteration loop.
+
+    Calls to this function can be nested.
+
+    This should not be used to iterate over parameter files --
+    :class:`~yt.data_objects.time_series.TimeSeriesData` provides a much nicer
+    interface for that.
+
+    Parameters
+    ----------
+    objects : iterable
+        The list of objects to dispatch to different processors.
+    njobs : int
+        How many jobs to spawn.  By default, one job will be dispatched for
+        each available processor.
+    storage : dict
+        This is a dictionary, which will be filled with results during the
+        course of the iteration.  The keys will be the parameter file
+        indices and the values will be whatever is assigned to the *result*
+        attribute on the storage during iteration.
+    barrier : bool
+        Should a barier be placed at the end of iteration?
+    dynamic : bool
+        This governs whether or not dynamic load balancing will be enabled.
+        This requires one dedicated processor; if this is enabled with a set of
+        128 processors available, only 127 will be available to iterate over
+        objects as one will be load balancing the rest.
+
+
+    Examples
+    --------
+    Here is a simple example of iterating over a set of centers and making
+    slice plots centered at each.
+
+    >>> for c in parallel_objects(centers):
+    ...     SlicePlot(pf, "x", "Density", center = c).save()
+    ...
+
+    Here's an example of calculating the angular momentum vector of a set of
+    spheres, but with a set of four jobs of multiple processors each.  Note
+    that we also store the results.
+
+    >>> storage = {}
+    >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage):
+    ...     sp = pf.h.sphere(c, (100, "kpc"))
+    ...     sto.result = sp.quantities["AngularMomentumVector"]()
+    ...
+    >>> for sphere_id, L in sorted(storage.items()):
+    ...     print c[sphere_id], L
+    ...
+
+    """
     if dynamic:
         from .task_queue import dynamic_parallel_objects
         for my_obj in dynamic_parallel_objects(objects, njobs=njobs,



https://bitbucket.org/yt_analysis/yt/changeset/bdc61310201c/
changeset:   bdc61310201c
branch:      yt
user:        jsoishi
date:        2012-07-26 19:52:28
summary:     Merged in MatthewTurk/yt (pull request #218)
affected #:  2 files

diff -r 42fdcc32a605b942ffa55aa04002680c3584d31f -r bdc61310201c3161b59d16efac9194283cfaf53f yt/data_objects/time_series.py
--- a/yt/data_objects/time_series.py
+++ b/yt/data_objects/time_series.py
@@ -23,7 +23,7 @@
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
-import inspect, functools, weakref
+import inspect, functools, weakref, glob, types
 
 from yt.funcs import *
 from yt.convenience import load
@@ -79,6 +79,27 @@
 
 class TimeSeriesData(object):
     def __init__(self, outputs, parallel = True):
+        r"""The TimeSeriesData object is a container of multiple datasets,
+        allowing easy iteration and computation on them.
+
+        TimeSeriesData objects are designed to provide easy ways to access,
+        analyze, parallelize and visualize multiple datasets sequentially.  This is
+        primarily expressed through iteration, but can also be constructed via
+        analysis tasks (see :ref:`time-series-analysis`).
+
+        The best method to construct TimeSeriesData objects is through 
+        :meth:`~yt.data_objects.time_series.TimeSeriesData.from_filenames`.
+
+
+        Examples
+        --------
+
+        >>> ts = TimeSeriesData.from_filenames(
+                "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+        >>> for pf in ts:
+        ...     SlicePlot(pf, "x", "Density").save()
+
+        """
         self.tasks = AnalysisTaskProxy(self)
         self.params = TimeSeriesParametersContainer(self)
         self._pre_outputs = outputs[:]
@@ -110,6 +131,65 @@
         return len(self._pre_outputs)
 
     def piter(self, storage = None):
+        r"""Iterate over time series components in parallel.
+
+        This allows you to iterate over a time series while dispatching
+        individual components of that time series to different processors or
+        processor groups.  If the parallelism strategy was set to be
+        multi-processor (by "parallel = N" where N is an integer when the
+        TimeSeriesData was created) this will issue each dataset to an
+        N-processor group.  For instance, this would allow you to start a 1024
+        processor job, loading up 100 datasets in a time series and creating 8
+        processor groups of 128 processors each, each of which would be
+        assigned a different dataset.  This could be accomplished as shown in
+        the examples below.  The *storage* option is as seen in
+        :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects`
+        which is a mechanism for storing results of analysis on an individual
+        dataset and then combining the results at the end, so that the entire
+        set of processors have access to those results.
+
+        Note that supplying a *store* changes the iteration mechanism; see
+        below.
+
+        Parameters
+        ----------
+        storage : dict
+            This is a dictionary, which will be filled with results during the
+            course of the iteration.  The keys will be the parameter file
+            indices and the values will be whatever is assigned to the *result*
+            attribute on the storage during iteration.
+
+        Examples
+        --------
+        Here is an example of iteration when the results do not need to be
+        stored.  One processor will be assigned to each parameter file.
+
+        >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+        >>> for pf in ts.piter():
+        ...    SlicePlot(pf, "x", "Density").save()
+        ...
+        
+        This demonstrates how one might store results:
+
+        >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+        >>> storage = {}
+        >>> for sto, pf in ts.piter():
+        ...     v, c = pf.h.find_max("Density")
+        ...     sto.result = (v, c)
+        ...
+        >>> for i, (v, c) in sorted(storage.items()):
+        ...     print "% 4i  %0.3e" % (i, v)
+        ...
+
+        This shows how to dispatch 4 processors to each dataset:
+
+        >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy",
+        ...                     parallel = 4)
+        >>> for pf in ts.piter():
+        ...     ProjectionPlot(pf, "x", "Density").save()
+        ...
+
+        """
         dynamic = False
         if self.parallel == False:
             njobs = 1
@@ -143,8 +223,44 @@
         return [v for k, v in sorted(return_values.items())]
 
     @classmethod
-    def from_filenames(cls, filename_list, parallel = True):
-        obj = cls(filename_list[:], parallel = parallel)
+    def from_filenames(cls, filenames, parallel = True):
+        r"""Create a time series from either a filename pattern or a list of
+        filenames.
+
+        This method provides an easy way to create a
+        :class:`~yt.data_objects.time_series.TimeSeriesData`, given a set of
+        filenames or a pattern that matches them.  Additionally, it can set the
+        parallelism strategy.
+
+        Parameters
+        ----------
+        filenames : list or pattern
+            This can either be a list of filenames (such as ["DD0001/DD0001",
+            "DD0002/DD0002"]) or a pattern to match, such as
+            "DD*/DD*.hierarchy").  If it's the former, they will be loaded in
+            order.  The latter will be identified with the glob module and then
+            sorted.
+        parallel : True, False or int
+            This parameter governs the behavior when .piter() is called on the
+            resultant TimeSeriesData object.  If this is set to False, the time
+            series will not iterate in parallel when .piter() is called.  If
+            this is set to either True or an integer, it will be iterated with
+            1 or that integer number of processors assigned to each parameter
+            file provided to the loop.
+
+        Examples
+        --------
+
+        >>> ts = TimeSeriesData.from_filenames(
+                "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+        >>> for pf in ts:
+        ...     SlicePlot(pf, "x", "Density").save()
+
+        """
+        if isinstance(filenames, types.StringTypes):
+            filenames = glob.glob(filenames)
+            filenames.sort()
+        obj = cls(filenames[:], parallel = parallel)
         return obj
 
     @classmethod


diff -r 42fdcc32a605b942ffa55aa04002680c3584d31f -r bdc61310201c3161b59d16efac9194283cfaf53f yt/utilities/parallel_tools/parallel_analysis_interface.py
--- a/yt/utilities/parallel_tools/parallel_analysis_interface.py
+++ b/yt/utilities/parallel_tools/parallel_analysis_interface.py
@@ -344,6 +344,67 @@
 
 def parallel_objects(objects, njobs = 0, storage = None, barrier = True,
                      dynamic = False):
+    r"""This function dispatches components of an iterable to different
+    processors.
+
+    The parallel_objects function accepts an iterable, *objects*, and based on
+    the number of jobs requested and number of available processors, decides
+    how to dispatch individual objects to processors or sets of processors.
+    This can implicitly include multi-level parallelism, such that the
+    processor groups assigned each object can be composed of several or even
+    hundreds of processors.  *storage* is also available, for collation of
+    results at the end of the iteration loop.
+
+    Calls to this function can be nested.
+
+    This should not be used to iterate over parameter files --
+    :class:`~yt.data_objects.time_series.TimeSeriesData` provides a much nicer
+    interface for that.
+
+    Parameters
+    ----------
+    objects : iterable
+        The list of objects to dispatch to different processors.
+    njobs : int
+        How many jobs to spawn.  By default, one job will be dispatched for
+        each available processor.
+    storage : dict
+        This is a dictionary, which will be filled with results during the
+        course of the iteration.  The keys will be the parameter file
+        indices and the values will be whatever is assigned to the *result*
+        attribute on the storage during iteration.
+    barrier : bool
+        Should a barier be placed at the end of iteration?
+    dynamic : bool
+        This governs whether or not dynamic load balancing will be enabled.
+        This requires one dedicated processor; if this is enabled with a set of
+        128 processors available, only 127 will be available to iterate over
+        objects as one will be load balancing the rest.
+
+
+    Examples
+    --------
+    Here is a simple example of iterating over a set of centers and making
+    slice plots centered at each.
+
+    >>> for c in parallel_objects(centers):
+    ...     SlicePlot(pf, "x", "Density", center = c).save()
+    ...
+
+    Here's an example of calculating the angular momentum vector of a set of
+    spheres, but with a set of four jobs of multiple processors each.  Note
+    that we also store the results.
+
+    >>> storage = {}
+    >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage):
+    ...     sp = pf.h.sphere(c, (100, "kpc"))
+    ...     sto.result = sp.quantities["AngularMomentumVector"]()
+    ...
+    >>> for sphere_id, L in sorted(storage.items()):
+    ...     print c[sphere_id], L
+    ...
+
+    """
     if dynamic:
         from .task_queue import dynamic_parallel_objects
         for my_obj in dynamic_parallel_objects(objects, njobs=njobs,

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.