[Yt-svn] yt-commit r636 - trunk/yt/lagos
mturk at wrangler.dreamhost.com
mturk at wrangler.dreamhost.com
Sun Jun 29 08:49:00 PDT 2008
Author: mturk
Date: Sun Jun 29 08:48:58 2008
New Revision: 636
URL: http://yt.spacepope.org/changeset/636
Log:
Added a bunch of comments to the profiler.
Modified:
trunk/yt/lagos/Profiles.py
Modified: trunk/yt/lagos/Profiles.py
==============================================================================
--- trunk/yt/lagos/Profiles.py (original)
+++ trunk/yt/lagos/Profiles.py Sun Jun 29 08:48:58 2008
@@ -27,6 +27,8 @@
def preserve_source_parameters(func):
def save_state(*args, **kwargs):
+ # Temporarily replace the 'field_parameters' for a
+ # grid with the 'field_parameters' for the data source
prof = args[0]
source = args[1]
if hasattr(source, 'field_parameters'):
@@ -48,8 +50,8 @@
self._lazy_reader = lazy_reader
def _lazy_add_fields(self, fields, weight, accumulation):
- data = {}
- weight_data = {}
+ data = {} # final results will go here
+ weight_data = {} # we need to track the weights as we go
for field in fields:
data[field] = self._get_empty_field()
weight_data[field] = self._get_empty_field()
@@ -58,19 +60,20 @@
for gi,grid in enumerate(self._data_source._grids):
pbar.update(gi)
args = self._get_bins(grid, check_cut=True)
- if not args:
+ if not args: # No bins returned for this grid, so forget it!
continue
for field in fields:
+ # We get back field values, weight values, used bins
f, w, u = self._bin_field(grid, field, weight, accumulation,
args=args, check_cut=True)
- data[field] += f
- weight_data[field] += w
- used = (used | u)
+ data[field] += f # running total
+ weight_data[field] += w # running total
+ used = (used | u) # running 'or'
grid.clear_data()
pbar.finish()
ub = na.where(used)
for field in fields:
- if weight:
+ if weight: # Now, at the end, we divide out.
data[field][ub] /= weight_data[field][ub]
self[field] = data[field]
self["UsedBins"] = used
@@ -115,6 +118,11 @@
lower_bound, upper_bound,
log_space = True, lazy_reader=False):
"""
+ A 'Profile' produces either a weighted (or unweighted) average or a
+ straight sum of a field in a bin defined by another field. In the case
+ of a weighted average, we have:
+ p_i = sum( w_i * v_i ) / sum(w_i)
+
We accept a *data_source*, which will be binned into *n_bins* by the
field *bin_field* between the *lower_bound* and the *upper_bound*.
These bins may or may not be equally divided in *log_space*, and the
@@ -124,6 +132,7 @@
BinnedProfile.__init__(self, data_source, lazy_reader)
self.bin_field = bin_field
self._x_log = log_space
+ # Get our bins
if log_space:
func = na.logspace
lower_bound, upper_bound = na.log10(lower_bound), na.log10(upper_bound)
@@ -131,6 +140,8 @@
func = na.linspace
self[bin_field] = func(lower_bound, upper_bound, n_bins)
+ # If we are not being memory-conservative, grab all the bins
+ # and the inverse indices right now.
if not lazy_reader:
self._args = self._get_bins(data_source)
@@ -140,8 +151,10 @@
@preserve_source_parameters
def _bin_field(self, source, field, weight, accumulation,
args, check_cut=False):
- mi, inv_bin_indices = args
- if check_cut:
+ mi, inv_bin_indices = args # Args has the indices to use as input
+ # check_cut is set if source != self._data_source
+ # (i.e., lazy_reader)
+ if check_cut: # Only use the points inside the source
cm = self._data_source._get_point_indices(source)
source_data = source[field][cm].astype('float64')[mi]
if weight: weight_data = source[weight][cm].astype('float64')[mi]
@@ -151,34 +164,44 @@
binned_field = self._get_empty_field()
weight_field = self._get_empty_field()
used_field = na.ones(weight_field.shape, dtype='bool')
+ # Now we perform the actual binning
for bin in inv_bin_indices.keys():
+ # temp_field is *all* the points from source that go into this bin
temp_field = source_data[inv_bin_indices[bin]]
if weight:
+ # now w_i * v_i and store sum(w_i)
weight_field[bin] = weight_data[inv_bin_indices[bin]].sum()
temp_field *= weight_data[inv_bin_indices[bin]]
binned_field[bin] = temp_field.sum()
- if accumulation: # Fix for laziness
+ # Fix for laziness, because at the *end* we will be
+ # summing up all of the histograms and dividing by the
+ # weights. Accumulation likely doesn't work with weighted
+ # average fields.
+ if accumulation:
binned_field = na.add.accumulate(binned_field)
return binned_field, weight_field, na.ones(binned_field.shape,dtype='bool')
@preserve_source_parameters
def _get_bins(self, source, check_cut=False):
- if check_cut:
+ if check_cut: # if source != self._data_source
cm = self._data_source._get_point_indices(source)
source_data = source[self.bin_field][cm]
else:
source_data = source[self.bin_field]
- if source_data.size == 0:
+ if source_data.size == 0: # Nothing for us here.
return
+ # Truncate at boundaries.
mi = na.where( (source_data > self[self.bin_field].min())
& (source_data < self[self.bin_field].max()))
sd = source_data[mi]
if sd.size == 0:
return
+ # Stick the bins into our fixed bins, set at initialization
bin_indices = na.digitize(sd, self[self.bin_field])
# Now we set up our inverse bin indices
inv_bin_indices = {}
for bin in range(self[self.bin_field].size):
+ # Which fall into our bin?
inv_bin_indices[bin] = na.where(bin_indices == bin)
return (mi, inv_bin_indices)
@@ -198,6 +221,11 @@
y_n_bins, y_bin_field, y_lower_bound, y_upper_bound, y_log,
lazy_reader=False):
"""
+ A 'Profile' produces either a weighted (or unweighted) average or a
+ straight sum of a field in a bin defined by two other fields. In the case
+ of a weighted average, we have:
+ p_i = sum( w_i * v_i ) / sum(w_i)
+
We accept a *data_source*, which will be binned into *x_n_bins* by the
field *x_bin_field* between the *x_lower_bound* and the *x_upper_bound*
and then again binned into *y_n_bins* by the field *y_bin_field*
More information about the yt-svn
mailing list