[yt-svn] commit/yt: MatthewTurk: Merged in MatthewTurk/yt/yt-3.0 (pull request #949)

Wed Jun 18 17:47:59 PDT 2014

1 new commit in yt:

https://bitbucket.org/yt_analysis/yt/commits/c6e1bd5989e2/
Changeset:   c6e1bd5989e2
Branch:      yt-3.0
User:        MatthewTurk
Date:        2014-06-19 02:47:51
Summary:     Merged in MatthewTurk/yt/yt-3.0 (pull request #949)

Some speedups for particle octree instantiations
Affected #:  5 files

diff -r 1d4b798712da4baa056d633ee73bd6b6b7e89cc1 -r c6e1bd5989e247913d02bb4cb767f55855f9ceb9 yt/frontends/sph/data_structures.py

--- a/yt/frontends/sph/data_structures.py
+++ b/yt/frontends/sph/data_structures.py
@@ -76,6 +76,7 @@
 class ParticleDataset(Dataset):
     _unit_base = None
     over_refine_factor = 1
+    filter_bbox = False
 
 
 class GadgetDataset(ParticleDataset):
@@ -312,8 +313,9 @@
 
         # Set standard values
         self.current_time = hvals["Time_GYR"] * sec_conversion["Gyr"]
-        self.domain_left_edge = np.zeros(3, "float64")
-        self.domain_right_edge = np.ones(3, "float64") * hvals["BoxSize"]
+        if self.domain_left_edge is None:
+            self.domain_left_edge = np.zeros(3, "float64")
+            self.domain_right_edge = np.ones(3, "float64") * hvals["BoxSize"]
         nz = 1 << self.over_refine_factor
         self.domain_dimensions = np.ones(3, "int32") * nz
         self.cosmological_simulation = 1

diff -r 1d4b798712da4baa056d633ee73bd6b6b7e89cc1 -r c6e1bd5989e247913d02bb4cb767f55855f9ceb9 yt/frontends/sph/io.py
--- a/yt/frontends/sph/io.py
+++ b/yt/frontends/sph/io.py
@@ -134,17 +134,13 @@
             dt = ds.dtype.newbyteorder("N") # Native
             pos = np.empty(ds.shape, dtype=dt)
             pos[:] = ds
-            if np.any(pos.min(axis=0) < self.pf.domain_left_edge) or \
-               np.any(pos.max(axis=0) > self.pf.domain_right_edge):
-                raise YTDomainOverflow(pos.min(axis=0),
-                                       pos.max(axis=0),
-                                       self.pf.domain_left_edge,
-                                       self.pf.domain_right_edge)
-            regions.add_data_file(pos, data_file.file_id)
+            regions.add_data_file(pos, data_file.file_id,
+                                  data_file.pf.filter_bbox)
             morton[ind:ind+pos.shape[0]] = compute_morton(
                 pos[:,0], pos[:,1], pos[:,2],
                 data_file.pf.domain_left_edge,
-                data_file.pf.domain_right_edge)
+                data_file.pf.domain_right_edge,
+                data_file.pf.filter_bbox)
             ind += pos.shape[0]
         f.close()
         return morton
@@ -322,14 +318,9 @@
             # The first total_particles * 3 values are positions
             pp = np.fromfile(f, dtype = 'float32', count = count*3)
             pp.shape = (count, 3)
-            if np.any(pp.min(axis=0) < self.pf.domain_left_edge) or \
-               np.any(pp.max(axis=0) > self.pf.domain_right_edge):
-                raise YTDomainOverflow(pp.min(axis=0),
-                                       pp.max(axis=0),
-                                       self.pf.domain_left_edge,
-                                       self.pf.domain_right_edge)
-        regions.add_data_file(pp, data_file.file_id)
-        morton = compute_morton(pp[:,0], pp[:,1], pp[:,2], DLE, DRE)
+        regions.add_data_file(pp, data_file.file_id, data_file.pf.filter_bbox)
+        morton = compute_morton(pp[:,0], pp[:,1], pp[:,2], DLE, DRE,
+                                data_file.pf.filter_bbox)
         return morton
 
     def _count_particles(self, data_file):
@@ -614,21 +605,15 @@
                         mylog.debug("Spanning: %0.3e .. %0.3e in %s", mi, ma, ax)
                         mis[axi] = mi
                         mas[axi] = ma
-                    if np.any(mis < pf.domain_left_edge) or \
-                       np.any(mas > pf.domain_right_edge):
-                        raise YTDomainOverflow(mis, mas,
-                                               pf.domain_left_edge,
-                                               pf.domain_right_edge)
                     pos = np.empty((pp.size, 3), dtype="float64")
                     for i, ax in enumerate("xyz"):
                         eps = np.finfo(pp["Coordinates"][ax].dtype).eps
-                        pos[:,i] = np.clip(pp["Coordinates"][ax],
-                                    self.domain_left_edge[i] + eps,
-                                    self.domain_right_edge[i] - eps)
-                    regions.add_data_file(pos, data_file.file_id)
+                        pos[:,i] = pp["Coordinates"][ax]
+                    regions.add_data_file(pos, data_file.file_id,
+                                          data_file.pf.filter_bbbox)
                     morton[ind:ind+c] = compute_morton(
                         pos[:,0], pos[:,1], pos[:,2],
-                        DLE, DRE)
+                        DLE, DRE, data_file.pf.filter_bbox)
                     ind += c
         mylog.info("Adding %0.3e particles", morton.size)
         return morton
@@ -772,11 +757,13 @@
             s = self._open_stream(data_file, (ptype, "Coordinates"))
             c = np.frombuffer(s, dtype="float64")
             c.shape = (c.shape[0]/3.0, 3)
-            regions.add_data_file(c, data_file.file_id)
+            regions.add_data_file(c, data_file.file_id,
+                                  data_file.pf.filter_bbox)
             morton[ind:ind+c.shape[0]] = compute_morton(
                 c[:,0], c[:,1], c[:,2],
                 data_file.pf.domain_left_edge,
-                data_file.pf.domain_right_edge)
+                data_file.pf.domain_right_edge,
+                data_filter.pf.filter_bbox)
             ind += c.shape[0]
         return morton
 

diff -r 1d4b798712da4baa056d633ee73bd6b6b7e89cc1 -r c6e1bd5989e247913d02bb4cb767f55855f9ceb9 yt/geometry/particle_oct_container.pyx
--- a/yt/geometry/particle_oct_container.pyx
+++ b/yt/geometry/particle_oct_container.pyx
@@ -165,11 +165,16 @@
         cdef int i, level, ind[3]
         if self.root_mesh[0][0][0] == NULL: self.allocate_root()
         cdef np.uint64_t *data = <np.uint64_t *> indices.data
+        cdef np.uint64_t FLAG = ~(<np.uint64_t>0)
         for p in range(no):
             # We have morton indices, which means we choose left and right by
             # looking at (MAX_ORDER - level) & with the values 1, 2, 4.
             level = 0
             index = indices[p]
+            if index == FLAG:
+                # This is a marker for the index not being inside the domain
+                # we're interested in.
+                continue
             for i in range(3):
                 ind[i] = (index >> ((ORDER_MAX - level)*3 + (2 - i))) & 1
             cur = self.root_mesh[ind[0]][ind[1]][ind[2]]
@@ -218,6 +223,7 @@
         # Now we look at the last nref particles to decide where they go.
         cdef int n = imin(p, self.n_ref)
         cdef np.uint64_t *arr = data + imax(p - self.n_ref, 0)
+        cdef np.uint64_t prefix1, prefix2
         # Now we figure out our prefix, which is the oct address at this level.
         # As long as we're actually in Morton order, we do not need to worry
         # about *any* of the other children of the oct.
@@ -263,6 +269,7 @@
 
 cdef class ParticleRegions:
     cdef np.float64_t left_edge[3]
+    cdef np.float64_t right_edge[3]
     cdef np.float64_t dds[3]
     cdef np.float64_t idds[3]
     cdef np.int32_t dims[3]
@@ -274,6 +281,7 @@
         self.nfiles = nfiles
         for i in range(3):
             self.left_edge[i] = left_edge[i]
+            self.right_edge[i] = right_edge[i]
             self.dims[i] = dims[i]
             self.dds[i] = (right_edge[i] - left_edge[i])/dims[i]
             self.idds[i] = 1.0/self.dds[i]
@@ -282,26 +290,36 @@
         for i in range(nfiles/64 + 1):
             self.masks.append(np.zeros(dims, dtype="uint64"))
 
-    def add_data_file(self, np.ndarray pos, int file_id):
+    def add_data_file(self, np.ndarray pos, int file_id, int filter = 0):
         if pos.dtype == np.float32:
-            self._mask_positions[np.float32_t](pos, file_id)
+            self._mask_positions[np.float32_t](pos, file_id, filter)
         elif pos.dtype == np.float64:
-            self._mask_positions[np.float64_t](pos, file_id)
+            self._mask_positions[np.float64_t](pos, file_id, filter)
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
     cdef void _mask_positions(self, np.ndarray[anyfloat, ndim=2] pos,
-                              np.uint64_t file_id):
+                              np.uint64_t file_id, int filter):
         cdef np.int64_t no = pos.shape[0]
         cdef np.int64_t p
-        cdef int ind[3], i
+        cdef int ind[3], i, use
         cdef np.ndarray[np.uint64_t, ndim=3] mask
         mask = self.masks[file_id/64]
         cdef np.uint64_t val = ONEBIT << (file_id - (file_id/64)*64)
         for p in range(no):
             # Now we locate the particle
+            use = 1
             for i in range(3):
+                if filter and (pos[p,i] < self.left_edge[i]
+                            or pos[p,i] > self.right_edge[i]):
+                    use = 0
+                    break
                 ind[i] = <int> ((pos[p, i] - self.left_edge[i])*self.idds[i])
                 ind[i] = iclip(ind[i], 0, self.dims[i])
-            mask[ind[0],ind[1],ind[2]] |= val
+            if use == 1:
+                mask[ind[0],ind[1],ind[2]] |= val
+        return
 
     def identify_data_files(self, SelectorObject selector):
         # This is relatively cheap to iterate over.

diff -r 1d4b798712da4baa056d633ee73bd6b6b7e89cc1 -r c6e1bd5989e247913d02bb4cb767f55855f9ceb9 yt/geometry/selection_routines.pyx
--- a/yt/geometry/selection_routines.pyx
+++ b/yt/geometry/selection_routines.pyx
@@ -654,6 +654,8 @@
             self.right_edge[i] = dobj.right_edge[i]
             self.right_edge_shift[i] = \
                 (dobj.right_edge).to_ndarray()[i] - domain_width.to_ndarray()
+            if not self.periodicity[i]:
+                self.right_edge_shift[i] = -np.inf
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
@@ -1522,6 +1524,9 @@
     cdef np.uint64_t min_ind
     cdef np.uint64_t max_ind
     cdef SelectorObject base_selector
+    cdef int filter_bbox
+    cdef np.float64_t DLE[3]
+    cdef np.float64_t DRE[3]
 
     def __init__(self, dobj):
         self.min_ind = dobj.min_ind
@@ -1529,6 +1534,12 @@
         self.base_selector = dobj.base_selector
         self.min_level = self.base_selector.min_level
         self.max_level = self.base_selector.max_level
+        self.filter_bbox = 0
+        if getattr(dobj.pf, "filter_bbox", False):
+            self.filter_bbox = 1
+        for i in range(3):
+            self.DLE[i] = dobj.pf.domain_left_edge[i]
+            self.DRE[i] = dobj.pf.domain_right_edge[i]
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
@@ -1555,6 +1566,12 @@
     @cython.wraparound(False)
     @cython.cdivision(True)
     cdef int select_point(self, np.float64_t pos[3]) nogil:
+        cdef int i
+        if self.filter_bbox == 0:
+            return 1
+        for i in range(3):
+            if pos[i] < self.DLE[i] or pos[i] > self.DRE[i]:
+                return 0
         return 1
 
     @cython.boundscheck(False)

diff -r 1d4b798712da4baa056d633ee73bd6b6b7e89cc1 -r c6e1bd5989e247913d02bb4cb767f55855f9ceb9 yt/utilities/lib/geometry_utils.pyx
--- a/yt/utilities/lib/geometry_utils.pyx
+++ b/yt/utilities/lib/geometry_utils.pyx
@@ -17,8 +17,9 @@
 cimport numpy as np
 cimport cython
 from libc.stdlib cimport malloc, free
-from fp_utils cimport fclip
+from fp_utils cimport fclip, i64clip
 from libc.math cimport copysign
+from yt.utilities.exceptions import YTDomainOverflow
 
 cdef extern from "math.h":
     double exp(double x) nogil
@@ -348,31 +349,57 @@
     np.float32_t
     np.float64_t
 
-cdef position_to_morton(np.ndarray[anyfloat, ndim=1] pos_x,
+ at cython.cdivision(True)
+ at cython.boundscheck(False)
+ at cython.wraparound(False)
+cdef np.int64_t position_to_morton(np.ndarray[anyfloat, ndim=1] pos_x,
                         np.ndarray[anyfloat, ndim=1] pos_y,
                         np.ndarray[anyfloat, ndim=1] pos_z,
                         np.float64_t dds[3], np.float64_t DLE[3],
-                        np.ndarray[np.uint64_t, ndim=1] ind):
+                        np.float64_t DRE[3],
+                        np.ndarray[np.uint64_t, ndim=1] ind,
+                        int filter):
     cdef np.uint64_t mi, ii[3]
     cdef np.float64_t p[3]
-    cdef np.int64_t i, j
+    cdef np.int64_t i, j, use
+    cdef np.uint64_t DD[3]
+    cdef np.uint64_t FLAG = ~(<np.uint64_t>0)
+    for i in range(3):
+        DD[i] = <np.uint64_t> ((DRE[i] - DLE[i]) / dds[i])
     for i in range(pos_x.shape[0]):
+        use = 1
         p[0] = <np.float64_t> pos_x[i]
         p[1] = <np.float64_t> pos_y[i]
         p[2] = <np.float64_t> pos_z[i]
         for j in range(3):
+            if p[j] < DLE[j] or p[j] > DRE[j]:
+                if filter == 1:
+                    # We only allow 20 levels, so this is inaccessible
+                    use = 0
+                    break
+                return i
             ii[j] = <np.uint64_t> ((p[j] - DLE[j])/dds[j])
+            ii[j] = i64clip(ii[j], 0, DD[j] - 1)
+        if use == 0: 
+            ind[i] = FLAG
+            continue
         mi = 0
         mi |= spread_bits(ii[2])<<0
         mi |= spread_bits(ii[1])<<1
         mi |= spread_bits(ii[0])<<2
         ind[i] = mi
+    return pos_x.shape[0]
 
 DEF ORDER_MAX=20
         
 def compute_morton(np.ndarray pos_x, np.ndarray pos_y, np.ndarray pos_z,
-                   domain_left_edge, domain_right_edge):
+                   domain_left_edge, domain_right_edge, filter_bbox = False):
     cdef int i
+    cdef int filter
+    if filter_bbox:
+        filter = 1
+    else:
+        filter = 0
     cdef np.float64_t dds[3], DLE[3], DRE[3]
     for i in range(3):
         DLE[i] = domain_left_edge[i]
@@ -380,13 +407,23 @@
         dds[i] = (DRE[i] - DLE[i]) / (1 << ORDER_MAX)
     cdef np.ndarray[np.uint64_t, ndim=1] ind
     ind = np.zeros(pos_x.shape[0], dtype="uint64")
+    cdef np.int64_t rv
     if pos_x.dtype == np.float32:
-        position_to_morton[np.float32_t](pos_x, pos_y, pos_z, dds, DLE, ind)
+        rv = position_to_morton[np.float32_t](
+                pos_x, pos_y, pos_z, dds, DLE, DRE, ind,
+                filter)
     elif pos_x.dtype == np.float64:
-        position_to_morton[np.float64_t](pos_x, pos_y, pos_z, dds, DLE, ind)
+        rv = position_to_morton[np.float64_t](
+                pos_x, pos_y, pos_z, dds, DLE, DRE, ind,
+                filter)
     else:
         print "Could not identify dtype.", pos_x.dtype
         raise NotImplementedError
+    if rv < pos_x.shape[0]:
+        mis = (pos_x.min(), pos_y.min(), pos_z.min())
+        mas = (pos_x.max(), pos_y.max(), pos_z.max())
+        raise YTDomainOverflow(mis, mas,
+                               domain_left_edge, domain_right_edge)
     return ind
 
 @cython.boundscheck(False)

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.