[yt-svn] commit/yt: 3 new changesets
Bitbucket
commits-noreply at bitbucket.org
Thu Jul 26 10:52:30 PDT 2012
3 new commits in yt:
https://bitbucket.org/yt_analysis/yt/changeset/a6136e2be34f/
changeset: a6136e2be34f
branch: yt
user: MatthewTurk
date: 2012-07-26 15:45:54
summary: Allow TimeSeriesData.from_filenames to accept a string, in which case it will
glob and sort the string.
affected #: 1 file
diff -r b59774e854fa4233250735434264b55743d630df -r a6136e2be34f4e1a0933b35b39ccef3100740bcd yt/data_objects/time_series.py
--- a/yt/data_objects/time_series.py
+++ b/yt/data_objects/time_series.py
@@ -23,7 +23,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
-import inspect, functools, weakref
+import inspect, functools, weakref, glob, types
from yt.funcs import *
from yt.convenience import load
@@ -144,6 +144,9 @@
@classmethod
def from_filenames(cls, filename_list, parallel = True):
+ if isinstance(filename_list, types.StringTypes):
+ filename_list = glob.glob(filename_list)
+ filename_list.sort()
obj = cls(filename_list[:], parallel = parallel)
return obj
https://bitbucket.org/yt_analysis/yt/changeset/4e073a8c644d/
changeset: 4e073a8c644d
branch: yt
user: MatthewTurk
date: 2012-07-26 16:57:09
summary: Adding docstring to piter and __init__ for TimeSeriesData, and for
parallel_objects.
affected #: 2 files
diff -r a6136e2be34f4e1a0933b35b39ccef3100740bcd -r 4e073a8c644d21fb5c0a770e59fc07d02e57d363 yt/data_objects/time_series.py
--- a/yt/data_objects/time_series.py
+++ b/yt/data_objects/time_series.py
@@ -79,6 +79,27 @@
class TimeSeriesData(object):
def __init__(self, outputs, parallel = True):
+ r"""The TimeSeriesData object is a container of multiple datasets,
+ allowing easy iteration and computation on them.
+
+ TimeSeriesData objects are designed to provide easy ways to access,
+ analyze, parallelize and visualize multiple datasets sequentially. This is
+ primarily expressed through iteration, but can also be constructed via
+ analysis tasks (see :ref:`time-series-analysis`).
+
+ The best method to construct TimeSeriesData objects is through
+ :meth:`~yt.data_objects.time_series.TimeSeriesData.from_filenames`.
+
+
+ Examples
+ --------
+
+ >>> ts = TimeSeriesData.from_filenames(
+ "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+ >>> for pf in ts:
+ ... SlicePlot(pf, "x", "Density").save()
+
+ """
self.tasks = AnalysisTaskProxy(self)
self.params = TimeSeriesParametersContainer(self)
self._pre_outputs = outputs[:]
@@ -110,6 +131,65 @@
return len(self._pre_outputs)
def piter(self, storage = None):
+ r"""Iterate over time series components in parallel.
+
+ This allows you to iterate over a time series while dispatching
+ individual components of that time series to different processors or
+ processor groups. If the parallelism strategy was set to be
+ multi-processor (by "parallel = N" where N is an integer when the
+ TimeSeriesData was created) this will issue each dataset to an
+ N-processor group. For instance, this would allow you to start a 1024
+ processor job, loading up 100 datasets in a time series and creating 8
+ processor groups of 128 processors each, each of which would be
+ assigned a different dataset. This could be accomplished as shown in
+ the examples below. The *storage* option is as seen in
+ :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects`
+ which is a mechanism for storing results of analysis on an individual
+ dataset and then combining the results at the end, so that the entire
+ set of processors have access to those results.
+
+ Note that supplying a *store* changes the iteration mechanism; see
+ below.
+
+ Parameters
+ ----------
+ storage : dict
+ This is a dictionary, which will be filled with results during the
+ course of the iteration. The keys will be the parameter file
+ indices and the values will be whatever is assigned to the *result*
+ attribute on the storage during iteration.
+
+ Examples
+ --------
+ Here is an example of iteration when the results do not need to be
+ stored. One processor will be assigned to each parameter file.
+
+ >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+ >>> for pf in ts.piter():
+ ... SlicePlot(pf, "x", "Density").save()
+ ...
+
+ This demonstrates how one might store results:
+
+ >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+ >>> storage = {}
+ >>> for sto, pf in ts.piter():
+ ... v, c = pf.h.find_max("Density")
+ ... sto.result = (v, c)
+ ...
+ >>> for i, (v, c) in sorted(storage.items()):
+ ... print "% 4i %0.3e" % (i, v)
+ ...
+
+ This shows how to dispatch 4 processors to each dataset:
+
+ >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy",
+ ... parallel = 4)
+ >>> for pf in ts.piter():
+ ... ProjectionPlot(pf, "x", "Density").save()
+ ...
+
+ """
dynamic = False
if self.parallel == False:
njobs = 1
@@ -143,11 +223,44 @@
return [v for k, v in sorted(return_values.items())]
@classmethod
- def from_filenames(cls, filename_list, parallel = True):
- if isinstance(filename_list, types.StringTypes):
- filename_list = glob.glob(filename_list)
- filename_list.sort()
- obj = cls(filename_list[:], parallel = parallel)
+ def from_filenames(cls, filenames, parallel = True):
+ r"""Create a time series from either a filename pattern or a list of
+ filenames.
+
+ This method provides an easy way to create a
+ :class:`~yt.data_objects.time_series.TimeSeriesData`, given a set of
+ filenames or a pattern that matches them. Additionally, it can set the
+ parallelism strategy.
+
+ Parameters
+ ----------
+ filenames : list or pattern
+ This can either be a list of filenames (such as ["DD0001/DD0001",
+ "DD0002/DD0002"]) or a pattern to match, such as
+ "DD*/DD*.hierarchy"). If it's the former, they will be loaded in
+ order. The latter will be identified with the glob module and then
+ sorted.
+ parallel : True, False or int
+ This parameter governs the behavior when .piter() is called on the
+ resultant TimeSeriesData object. If this is set to False, the time
+ series will not iterate in parallel when .piter() is called. If
+ this is set to either True or an integer, it will be iterated with
+ 1 or that integer number of processors assigned to each parameter
+ file provided to the loop.
+
+ Examples
+ --------
+
+ >>> ts = TimeSeriesData.from_filenames(
+ "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+ >>> for pf in ts:
+ ... SlicePlot(pf, "x", "Density").save()
+
+ """
+ if isinstance(filenames, types.StringTypes):
+ filenames = glob.glob(filenames)
+ filenames.sort()
+ obj = cls(filenames[:], parallel = parallel)
return obj
@classmethod
diff -r a6136e2be34f4e1a0933b35b39ccef3100740bcd -r 4e073a8c644d21fb5c0a770e59fc07d02e57d363 yt/utilities/parallel_tools/parallel_analysis_interface.py
--- a/yt/utilities/parallel_tools/parallel_analysis_interface.py
+++ b/yt/utilities/parallel_tools/parallel_analysis_interface.py
@@ -344,6 +344,67 @@
def parallel_objects(objects, njobs = 0, storage = None, barrier = True,
dynamic = False):
+ r"""This function dispatches components of an iterable to different
+ processors.
+
+ The parallel_objects function accepts an iterable, *objects*, and based on
+ the number of jobs requested and number of available processors, decides
+ how to dispatch individual objects to processors or sets of processors.
+ This can implicitly include multi-level parallelism, such that the
+ processor groups assigned each object can be composed of several or even
+ hundreds of processors. *storage* is also available, for collation of
+ results at the end of the iteration loop.
+
+ Calls to this function can be nested.
+
+ This should not be used to iterate over parameter files --
+ :class:`~yt.data_objects.time_series.TimeSeriesData` provides a much nicer
+ interface for that.
+
+ Parameters
+ ----------
+ objects : iterable
+ The list of objects to dispatch to different processors.
+ njobs : int
+ How many jobs to spawn. By default, one job will be dispatched for
+ each available processor.
+ storage : dict
+ This is a dictionary, which will be filled with results during the
+ course of the iteration. The keys will be the parameter file
+ indices and the values will be whatever is assigned to the *result*
+ attribute on the storage during iteration.
+ barrier : bool
+ Should a barier be placed at the end of iteration?
+ dynamic : bool
+ This governs whether or not dynamic load balancing will be enabled.
+ This requires one dedicated processor; if this is enabled with a set of
+ 128 processors available, only 127 will be available to iterate over
+ objects as one will be load balancing the rest.
+
+
+ Examples
+ --------
+ Here is a simple example of iterating over a set of centers and making
+ slice plots centered at each.
+
+ >>> for c in parallel_objects(centers):
+ ... SlicePlot(pf, "x", "Density", center = c).save()
+ ...
+
+ Here's an example of calculating the angular momentum vector of a set of
+ spheres, but with a set of four jobs of multiple processors each. Note
+ that we also store the results.
+
+ >>> storage = {}
+ >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage):
+ ... sp = pf.h.sphere(c, (100, "kpc"))
+ ... sto.result = sp.quantities["AngularMomentumVector"]()
+ ...
+ >>> for sphere_id, L in sorted(storage.items()):
+ ... print c[sphere_id], L
+ ...
+
+ """
if dynamic:
from .task_queue import dynamic_parallel_objects
for my_obj in dynamic_parallel_objects(objects, njobs=njobs,
https://bitbucket.org/yt_analysis/yt/changeset/bdc61310201c/
changeset: bdc61310201c
branch: yt
user: jsoishi
date: 2012-07-26 19:52:28
summary: Merged in MatthewTurk/yt (pull request #218)
affected #: 2 files
diff -r 42fdcc32a605b942ffa55aa04002680c3584d31f -r bdc61310201c3161b59d16efac9194283cfaf53f yt/data_objects/time_series.py
--- a/yt/data_objects/time_series.py
+++ b/yt/data_objects/time_series.py
@@ -23,7 +23,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
-import inspect, functools, weakref
+import inspect, functools, weakref, glob, types
from yt.funcs import *
from yt.convenience import load
@@ -79,6 +79,27 @@
class TimeSeriesData(object):
def __init__(self, outputs, parallel = True):
+ r"""The TimeSeriesData object is a container of multiple datasets,
+ allowing easy iteration and computation on them.
+
+ TimeSeriesData objects are designed to provide easy ways to access,
+ analyze, parallelize and visualize multiple datasets sequentially. This is
+ primarily expressed through iteration, but can also be constructed via
+ analysis tasks (see :ref:`time-series-analysis`).
+
+ The best method to construct TimeSeriesData objects is through
+ :meth:`~yt.data_objects.time_series.TimeSeriesData.from_filenames`.
+
+
+ Examples
+ --------
+
+ >>> ts = TimeSeriesData.from_filenames(
+ "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+ >>> for pf in ts:
+ ... SlicePlot(pf, "x", "Density").save()
+
+ """
self.tasks = AnalysisTaskProxy(self)
self.params = TimeSeriesParametersContainer(self)
self._pre_outputs = outputs[:]
@@ -110,6 +131,65 @@
return len(self._pre_outputs)
def piter(self, storage = None):
+ r"""Iterate over time series components in parallel.
+
+ This allows you to iterate over a time series while dispatching
+ individual components of that time series to different processors or
+ processor groups. If the parallelism strategy was set to be
+ multi-processor (by "parallel = N" where N is an integer when the
+ TimeSeriesData was created) this will issue each dataset to an
+ N-processor group. For instance, this would allow you to start a 1024
+ processor job, loading up 100 datasets in a time series and creating 8
+ processor groups of 128 processors each, each of which would be
+ assigned a different dataset. This could be accomplished as shown in
+ the examples below. The *storage* option is as seen in
+ :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects`
+ which is a mechanism for storing results of analysis on an individual
+ dataset and then combining the results at the end, so that the entire
+ set of processors have access to those results.
+
+ Note that supplying a *store* changes the iteration mechanism; see
+ below.
+
+ Parameters
+ ----------
+ storage : dict
+ This is a dictionary, which will be filled with results during the
+ course of the iteration. The keys will be the parameter file
+ indices and the values will be whatever is assigned to the *result*
+ attribute on the storage during iteration.
+
+ Examples
+ --------
+ Here is an example of iteration when the results do not need to be
+ stored. One processor will be assigned to each parameter file.
+
+ >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+ >>> for pf in ts.piter():
+ ... SlicePlot(pf, "x", "Density").save()
+ ...
+
+ This demonstrates how one might store results:
+
+ >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy")
+ >>> storage = {}
+ >>> for sto, pf in ts.piter():
+ ... v, c = pf.h.find_max("Density")
+ ... sto.result = (v, c)
+ ...
+ >>> for i, (v, c) in sorted(storage.items()):
+ ... print "% 4i %0.3e" % (i, v)
+ ...
+
+ This shows how to dispatch 4 processors to each dataset:
+
+ >>> ts = TimeSeriesData.from_filenames("DD*/DD*.hierarchy",
+ ... parallel = 4)
+ >>> for pf in ts.piter():
+ ... ProjectionPlot(pf, "x", "Density").save()
+ ...
+
+ """
dynamic = False
if self.parallel == False:
njobs = 1
@@ -143,8 +223,44 @@
return [v for k, v in sorted(return_values.items())]
@classmethod
- def from_filenames(cls, filename_list, parallel = True):
- obj = cls(filename_list[:], parallel = parallel)
+ def from_filenames(cls, filenames, parallel = True):
+ r"""Create a time series from either a filename pattern or a list of
+ filenames.
+
+ This method provides an easy way to create a
+ :class:`~yt.data_objects.time_series.TimeSeriesData`, given a set of
+ filenames or a pattern that matches them. Additionally, it can set the
+ parallelism strategy.
+
+ Parameters
+ ----------
+ filenames : list or pattern
+ This can either be a list of filenames (such as ["DD0001/DD0001",
+ "DD0002/DD0002"]) or a pattern to match, such as
+ "DD*/DD*.hierarchy"). If it's the former, they will be loaded in
+ order. The latter will be identified with the glob module and then
+ sorted.
+ parallel : True, False or int
+ This parameter governs the behavior when .piter() is called on the
+ resultant TimeSeriesData object. If this is set to False, the time
+ series will not iterate in parallel when .piter() is called. If
+ this is set to either True or an integer, it will be iterated with
+ 1 or that integer number of processors assigned to each parameter
+ file provided to the loop.
+
+ Examples
+ --------
+
+ >>> ts = TimeSeriesData.from_filenames(
+ "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0")
+ >>> for pf in ts:
+ ... SlicePlot(pf, "x", "Density").save()
+
+ """
+ if isinstance(filenames, types.StringTypes):
+ filenames = glob.glob(filenames)
+ filenames.sort()
+ obj = cls(filenames[:], parallel = parallel)
return obj
@classmethod
diff -r 42fdcc32a605b942ffa55aa04002680c3584d31f -r bdc61310201c3161b59d16efac9194283cfaf53f yt/utilities/parallel_tools/parallel_analysis_interface.py
--- a/yt/utilities/parallel_tools/parallel_analysis_interface.py
+++ b/yt/utilities/parallel_tools/parallel_analysis_interface.py
@@ -344,6 +344,67 @@
def parallel_objects(objects, njobs = 0, storage = None, barrier = True,
dynamic = False):
+ r"""This function dispatches components of an iterable to different
+ processors.
+
+ The parallel_objects function accepts an iterable, *objects*, and based on
+ the number of jobs requested and number of available processors, decides
+ how to dispatch individual objects to processors or sets of processors.
+ This can implicitly include multi-level parallelism, such that the
+ processor groups assigned each object can be composed of several or even
+ hundreds of processors. *storage* is also available, for collation of
+ results at the end of the iteration loop.
+
+ Calls to this function can be nested.
+
+ This should not be used to iterate over parameter files --
+ :class:`~yt.data_objects.time_series.TimeSeriesData` provides a much nicer
+ interface for that.
+
+ Parameters
+ ----------
+ objects : iterable
+ The list of objects to dispatch to different processors.
+ njobs : int
+ How many jobs to spawn. By default, one job will be dispatched for
+ each available processor.
+ storage : dict
+ This is a dictionary, which will be filled with results during the
+ course of the iteration. The keys will be the parameter file
+ indices and the values will be whatever is assigned to the *result*
+ attribute on the storage during iteration.
+ barrier : bool
+ Should a barier be placed at the end of iteration?
+ dynamic : bool
+ This governs whether or not dynamic load balancing will be enabled.
+ This requires one dedicated processor; if this is enabled with a set of
+ 128 processors available, only 127 will be available to iterate over
+ objects as one will be load balancing the rest.
+
+
+ Examples
+ --------
+ Here is a simple example of iterating over a set of centers and making
+ slice plots centered at each.
+
+ >>> for c in parallel_objects(centers):
+ ... SlicePlot(pf, "x", "Density", center = c).save()
+ ...
+
+ Here's an example of calculating the angular momentum vector of a set of
+ spheres, but with a set of four jobs of multiple processors each. Note
+ that we also store the results.
+
+ >>> storage = {}
+ >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage):
+ ... sp = pf.h.sphere(c, (100, "kpc"))
+ ... sto.result = sp.quantities["AngularMomentumVector"]()
+ ...
+ >>> for sphere_id, L in sorted(storage.items()):
+ ... print c[sphere_id], L
+ ...
+
+ """
if dynamic:
from .task_queue import dynamic_parallel_objects
for my_obj in dynamic_parallel_objects(objects, njobs=njobs,
Repository URL: https://bitbucket.org/yt_analysis/yt/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the yt-svn
mailing list