[Yt-svn] yt-commit r1089 - trunk/yt/lagos
mturk at wrangler.dreamhost.com
mturk at wrangler.dreamhost.com
Wed Jan 7 21:38:54 PST 2009
Author: mturk
Date: Wed Jan 7 21:38:53 2009
New Revision: 1089
URL: http://yt.spacepope.org/changeset/1089
Log:
Major improvements to clump finder.
Field caching is now available, but off by default. This is the cached_fields
argument to the initializer of a Clump -- you have to feed this in yourself.
Something like:
defaultdict(lambda:dict())
will do it. (When you finish with the clump finding, you should call .clear()
on this object!)
Additionally, I rewrote the queueing system for the contouring; this sped
things up (in my tests) by about a factor of six. I should have run a profiler
on this code long ago.
Fixed the CUDA IsBound to check for small numbers of cells -- if we don't have
lots of cells, we just dump back to the C code. This helps with cases where
you pick out small regions of cells, like in high-Jeans runs.
The change to CoveringGrid where I commented out the field assignment of
'dx','dy' and 'dz' might be problematic, but it looks valid to me and the tests
all pass.
Fixed some potential issues with empty grid cell counts in extracted regions.
All in all, I am quite pleased with the clump finder now. It's substantially
faster and the accuracy has been retained.
I'll probably convert the format string'd cuda.SourceModule to have a parameter
passed in, but for now it's not slowing us down much.
Modified:
trunk/yt/lagos/BaseDataTypes.py
trunk/yt/lagos/Clump.py
trunk/yt/lagos/ContourFinder.py
trunk/yt/lagos/DerivedQuantities.py
Modified: trunk/yt/lagos/BaseDataTypes.py
==============================================================================
--- trunk/yt/lagos/BaseDataTypes.py (original)
+++ trunk/yt/lagos/BaseDataTypes.py Wed Jan 7 21:38:53 2009
@@ -360,7 +360,7 @@
fields_to_get = self.fields
else:
fields_to_get = ensure_list(fields)
- mylog.debug("Going to obtain %s (%s)", fields_to_get, self.fields)
+ mylog.debug("Going to obtain %s", fields_to_get, self.fields)
for field in fields_to_get:
if self.data.has_key(field):
continue
@@ -1445,7 +1445,7 @@
grid_vals, xi, yi, zi = [], [], [], []
for grid in self._base_region._grids:
xit,yit,zit = self._base_region._get_point_indices(grid)
- grid_vals.append(na.ones(xit.shape, dtype='int64') * grid.id-grid._id_offset)
+ grid_vals.append(na.ones(xit.shape, dtype='int') * (grid.id-grid._id_offset))
xi.append(xit)
yi.append(yit)
zi.append(zit)
@@ -1456,7 +1456,7 @@
xi = na.concatenate(xi)[self._base_indices][grid_order]
yi = na.concatenate(yi)[self._base_indices][grid_order]
zi = na.concatenate(zi)[self._base_indices][grid_order]
- bc = na.bincount(grid_vals.astype("int32"))
+ bc = na.bincount(grid_vals)
splits = []
for i,v in enumerate(bc):
if v > 0: splits.append(v)
@@ -1465,17 +1465,16 @@
self._indices = {}
for grid_id, x, y, z in zip(grid_ids, xis, yis, zis):
# grid_id needs no offset
- self._indices[grid_id] = (x.astype('int64'),
- y.astype('int64'),
- z.astype('int64'))
+ self._indices[grid_id] = (x, y, z)
self._grids = self._base_region.pf.h.grids[self._indices.keys()]
def _is_fully_enclosed(self, grid):
- return (self._indices[grid.id-1][0].size == grid.ActiveDimensions.prod())
+ return (self._indices[grid.id-grid._id_offset][0].size == grid.ActiveDimensions.prod())
+ __empty_array = na.array([], dtype='bool')
def _get_point_indices(self, grid, use_child_mask=True):
# Yeah, if it's not true, we don't care.
- return self._indices.get(grid.id-grid._id_offset, ())
+ return self._indices.get(grid.id-grid._id_offset, self.__empty_array)
class InLineExtractedRegionBase(AMR3DData):
"""
@@ -1767,6 +1766,7 @@
self._refresh_data()
def _get_list_of_grids(self):
+ if self._grids is not None: return
if na.any(self.left_edge < self.pf["DomainLeftEdge"]) or \
na.any(self.right_edge > self.pf["DomainRightEdge"]):
grids,ind = self.pf.hierarchy.get_periodic_box_grids(
@@ -1784,9 +1784,9 @@
def _refresh_data(self):
AMR3DData._refresh_data(self)
- self['dx'] = self.dx * na.ones(self.ActiveDimensions, dtype='float64')
- self['dy'] = self.dy * na.ones(self.ActiveDimensions, dtype='float64')
- self['dz'] = self.dz * na.ones(self.ActiveDimensions, dtype='float64')
+ #self['dx'] = self.dx * na.ones(self.ActiveDimensions, dtype='float64')
+ #self['dy'] = self.dy * na.ones(self.ActiveDimensions, dtype='float64')
+ #self['dz'] = self.dz * na.ones(self.ActiveDimensions, dtype='float64')
def get_data(self, field=None):
self._get_list_of_grids()
@@ -1828,11 +1828,8 @@
fields_to_get = self.fields
else:
fields_to_get = ensure_list(field)
- for field in fields_to_get:
- #mylog.debug("Flushing field %s to %s possible grids",
- #field, len(self._grids))
- for grid in self._grids:
- self._flush_data_to_grid(grid, field)
+ for grid in self._grids:
+ self._flush_data_to_grid(grid, fields_to_get)
@restore_grid_state
def _get_data_from_grid(self, grid, fields):
Modified: trunk/yt/lagos/Clump.py
==============================================================================
--- trunk/yt/lagos/Clump.py (original)
+++ trunk/yt/lagos/Clump.py Wed Jan 7 21:38:53 2009
@@ -28,23 +28,26 @@
class Clump(object):
children = None
- def __init__(self, data, parent, field):
+ def __init__(self, data, parent, field, cached_fields = None):
self.parent = parent
self.data = data
self.field = field
self.min = self.data[field].min()
self.max = self.data[field].max()
self.isBound = None
+ self.cached_fields = cached_fields
def find_children(self, min, max = None):
if self.children is not None:
print "Wiping out existing children clumps."
self.children = []
if max is None: max = self.max
- contour_info = identify_contours(self.data, self.field, min, max)
+ contour_info = identify_contours(self.data, self.field, min, max,
+ self.cached_fields)
for cid in contour_info:
new_clump = self.data.extract_region(contour_info[cid])
- self.children.append(Clump(new_clump, self, self.field))
+ self.children.append(Clump(new_clump, self, self.field,
+ self.cached_fields))
def get_IsBound(self):
if self.isBound is None:
Modified: trunk/yt/lagos/ContourFinder.py
==============================================================================
--- trunk/yt/lagos/ContourFinder.py (original)
+++ trunk/yt/lagos/ContourFinder.py Wed Jan 7 21:38:53 2009
@@ -26,7 +26,7 @@
from yt.lagos import *
class GridConsiderationQueue:
- def __init__(self, white_list = None, priority_func=None):
+ def __init__(self, white_list, priority_func=None):
"""
This class exists to serve the contour finder. It ensures that
we can create a cascading set of queue dependencies, and if
@@ -34,37 +34,25 @@
of the queue again. It like has few uses.
"""
self.to_consider = []
- self.considered = []
+ self.considered = set()
self.n = 0
- if white_list != None:
- self.white_list = white_list
- else:
- self.white_list = []
+ self.white_list = set(white_list)
self.priority_func = priority_func
def add(self, grids, force=False):
- if hasattr(grids,'size'):
- grids = grids.tolist()
- else:
+ if not hasattr(grids,'size'):
grids = ensure_list(grids)
- if self.priority_func:
- grids.sort(key=self.priority_func)
- else:
- grids.sort()
- if force:
- for g in grids:
- for i in range(self.considered.count(g)):
- del self.considered[self.considered.index(g)]
i = self.n
- for g in grids:
- if g not in self.white_list: continue
- if g in self.considered: continue
- if g in self.to_consider[i:]:
- del self.to_consider[i+self.to_consider[i:].index(g)]
+ to_check = self.white_list.intersection(grids)
+ if not force: to_check.difference_update(self.considered)
+ for g in sorted(to_check, key=self.priority_func):
+ try:
+ # We only delete from subsequent checks
+ del self.to_consider[self.to_consider.index(g, i)]
self.to_consider.insert(i,g)
i += 1
- continue
- self.to_consider.append(g)
+ except ValueError:
+ self.to_consider.append(g)
def __iter__(self):
return self
@@ -73,7 +61,7 @@
if self.n >= len(self.to_consider):
raise StopIteration
tr = self.to_consider[self.n]
- self.considered.append(tr)
+ self.considered.add(tr)
self.n += 1
return tr
@@ -101,7 +89,7 @@
mylog.info("Contouring over %s cells with %s candidates", contour_ids[0],np)
data_source["tempContours"][contour_ind] = contour_ids[:]
data_source._flush_data_to_grids("tempContours", -1, dtype='int32')
- my_queue = GridConsiderationQueue(data_source._grids,
+ my_queue = GridConsiderationQueue(white_list = data_source._grids,
priority_func = lambda g: -1*g["tempContours"].max())
my_queue.add(data_source._grids)
for i,grid in enumerate(my_queue):
@@ -129,18 +117,21 @@
cur_max_id -= local_ind[0].size
fd = cg["tempContours"].astype('int64')
fd_original = fd.copy()
- xi_u,yi_u,zi_u = na.where(fd > -1)
- cor_order = na.argsort(-1*fd[(xi_u,yi_u,zi_u)])
- xi = xi_u[cor_order]
- yi = yi_u[cor_order]
- zi = zi_u[cor_order]
- PointCombine.FindContours(fd, xi, yi, zi)
+ if na.all(fd > -1):
+ fd[:] = fd.max()
+ else:
+ xi_u,yi_u,zi_u = na.where(fd > -1)
+ cor_order = na.argsort(-1*fd[(xi_u,yi_u,zi_u)])
+ xi = xi_u[cor_order]
+ yi = yi_u[cor_order]
+ zi = zi_u[cor_order]
+ PointCombine.FindContours(fd, xi, yi, zi)
cg["tempContours"] = fd.copy().astype('float64')
cg.flush_data("tempContours")
my_queue.add(cg._grids)
force_ind = na.unique(cg["GridIndices"][na.where(
(cg["tempContours"] > fd_original)
- & (cg["GridIndices"] != grid.id-1) )])
+ & (cg["GridIndices"] != grid.id-grid._id_offset) )])
if len(force_ind) > 0:
my_queue.add(data_source.hierarchy.grids[force_ind.astype('int32')], force=True)
for ax in 'xyz':
Modified: trunk/yt/lagos/DerivedQuantities.py
==============================================================================
--- trunk/yt/lagos/DerivedQuantities.py (original)
+++ trunk/yt/lagos/DerivedQuantities.py Wed Jan 7 21:38:53 2009
@@ -27,6 +27,9 @@
from yt.lagos import *
from yt.funcs import get_pbar, wraps
+import math
+
+__CUDA_BLOCK_SIZE = 256
quantity_info = {}
@@ -215,6 +218,8 @@
"""
# Kinetic energy
bv_x,bv_y,bv_z = data.quantities["BulkVelocity"]()
+ # One-cell objects are NOT BOUND.
+ if data["CellMass"].size == 1: return [0.0]
kinetic = 0.5 * (data["CellMass"] * (
(data["x-velocity"] - bv_x)**2
+ (data["y-velocity"] - bv_y)**2
@@ -234,7 +239,7 @@
except (ImportError, AssertionError):
pot = 2*G*PointCombine.FindBindingEnergy(data["CellMass"],
data['x'],data['y'],data['z'],
- False, kinetic/(2*G))
+ truncate, kinetic/(2*G))
mylog.info("Boundedness check took %0.3e seconds", time.time()-t1)
return [(pot / kinetic)]
def _combIsBound(data, bound):
@@ -243,6 +248,7 @@
force_unlazy=True)
def _cudaIsBound(data, truncate, ratio):
+ bsize = __CUDA_BLOCK_SIZE
import pycuda.driver as cuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
@@ -250,14 +256,20 @@
cuda.init()
assert cuda.Device.count() >= 1
+ mass_scale_factor = 1.0/(data['CellMass'].max())
+ m = (data['CellMass'] * mass_scale_factor).astype('float32')
+ assert(m.size > bsize)
+
+ gsize=int(math.ceil(float(m.size)/bsize))
+ assert(gsize > 16)
+
# Now the tedious process of rescaling our values...
length_scale_factor = data['dx'].max()/data['dx'].min()
- mass_scale_factor = 1.0/(data['CellMass'].max())
x = ((data['x'] - data['x'].min()) * length_scale_factor).astype('float32')
y = ((data['y'] - data['y'].min()) * length_scale_factor).astype('float32')
z = ((data['z'] - data['z'].min()) * length_scale_factor).astype('float32')
- m = (data['CellMass'] * mass_scale_factor).astype('float32')
p = na.zeros(z.shape, dtype='float32')
+
x_gpu = cuda.mem_alloc(x.size * x.dtype.itemsize)
y_gpu = cuda.mem_alloc(y.size * y.dtype.itemsize)
z_gpu = cuda.mem_alloc(z.size * z.dtype.itemsize)
@@ -270,7 +282,7 @@
extern __shared__ float array[];
__global__ void isbound(float *x, float *y, float *z, float *m,
- float *p)
+ float *p, int *nelem)
{
/* My index in the array */
@@ -305,23 +317,22 @@
float my_p = 0.0;
- for (int i = 0; i < blockDim.x; i++){
- if(i + idx2 < idx1 + 1) continue;
- tx = (x_data1[offset]-x_data2[i]);
- ty = (y_data1[offset]-y_data2[i]);
- tz = (z_data1[offset]-z_data2[i]);
- my_p += m_data1[offset]*m_data2[i] /
- sqrt(tx*tx+ty*ty+tz*tz);
+ if(idx1 < %(p)s) {
+ for (int i = 0; i < blockDim.x; i++){
+ if(i + idx2 < idx1 + 1) continue;
+ tx = (x_data1[offset]-x_data2[i]);
+ ty = (y_data1[offset]-y_data2[i]);
+ tz = (z_data1[offset]-z_data2[i]);
+ my_p += m_data1[offset]*m_data2[i] /
+ sqrt(tx*tx+ty*ty+tz*tz);
+ }
}
p[idx1] += my_p;
__syncthreads();
}
"""
- bsize = 256
- mod = cuda.SourceModule(source % dict(p=m.size, b=bsize), keep=True)
+ mod = cuda.SourceModule(source % dict(p=m.size))
func = mod.get_function('isbound')
- import math
- gsize=int(math.ceil(float(m.size)/bsize))
mylog.info("Running CUDA functions. May take a while. (%0.5e, %s)",
x.size, gsize)
import pycuda.tools as ct
@@ -331,6 +342,7 @@
block=(bsize,1,1), grid=(gsize, gsize), time_kernel=True)
cuda.memcpy_dtoh(p, p_gpu)
p1 = p.sum()
+ if na.any(na.isnan(p)): raise ValueError
return p1 * (length_scale_factor / (mass_scale_factor**2.0))
def _Extrema(data, fields):
More information about the yt-svn
mailing list