[Yt-svn] commit/yt: 10 new changesets
Bitbucket
commits-noreply at bitbucket.org
Thu Nov 3 06:01:48 PDT 2011
10 new commits in yt:
https://bitbucket.org/yt_analysis/yt/changeset/1e5955fdd14d/
changeset: 1e5955fdd14d
branch: yt
user: sskory
date: 2011-10-28 00:40:12
summary: Adding a modified scipy.spatial back into yt and parallelHOP.
It can be used with 'tree = "C"' when calling parallelHF.
It is much slower, but hopefully lower (or can be made)
in memory than the Fortran kdtree.
affected #: 15 files
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/analysis_modules/halo_finding/halo_objects.py
--- a/yt/analysis_modules/halo_finding/halo_objects.py
+++ b/yt/analysis_modules/halo_finding/halo_objects.py
@@ -1376,7 +1376,8 @@
["ParticleMassMsun", "particle_index"]
def __init__(self, data_source, padding, num_neighbors, bounds, total_mass,
- period, threshold=160.0, dm_only=True, rearrange=True, premerge=True):
+ period, threshold=160.0, dm_only=True, rearrange=True, premerge=True,
+ tree = 'F'):
"""
Run hop on *data_source* with a given density *threshold*. If
*dm_only* is set, only run it on the dark matter particles, otherwise
@@ -1393,6 +1394,7 @@
self.period = na.array([1.]*3)
self._data_source = data_source
self.premerge = premerge
+ self.tree = tree
mylog.info("Initializing HOP")
HaloList.__init__(self, data_source, dm_only)
@@ -1421,7 +1423,8 @@
obj = ParallelHOPHaloFinder(self.period, self.padding,
self.num_neighbors, self.bounds,
self.particle_fields,
- self.threshold, rearrange=self.rearrange, premerge=self.premerge)
+ self.threshold, rearrange=self.rearrange, premerge=self.premerge,
+ tree = self.tree)
self.densities, self.tags = obj.density, obj.chainID
# I'm going to go ahead and delete self.densities because it's not
# actually being used. I'm not going to remove it altogether because
@@ -1780,7 +1783,7 @@
def __init__(self, pf, subvolume=None,threshold=160, dm_only=True, \
resize=True, rearrange=True,\
fancy_padding=True, safety=1.5, premerge=True, sample=0.03, \
- total_mass=None, num_particles=None):
+ total_mass=None, num_particles=None, tree = 'F'):
r"""Parallel HOP halo finder.
Halos are built by:
@@ -1810,9 +1813,16 @@
Default = False.
resize : bool
Turns load-balancing on or off. Default = True.
+ kdtree : string
+ Chooses which kD Tree to use. The Fortran one (kdtree = 'F') is
+ faster, but uses more memory. The Cython one (kdtree = 'C') is
+ slower but is more memory efficient.
+ Default = 'F'
rearrange : bool
Turns on faster nearest neighbor searches at the cost of increased
- memory usage. Default = True.
+ memory usage.
+ This option only applies when using the Fortran tree.
+ Default = True.
fancy_padding : bool
True calculates padding independently for each face of each
subvolume. Default = True.
@@ -1862,6 +1872,9 @@
self.num_neighbors = 65
self.safety = safety
self.sample = sample
+ self.tree = tree
+ if self.tree != 'F' and self.tree != 'C':
+ mylog.error("No kD Tree specified!")
period = pf.domain_right_edge - pf.domain_left_edge
topbounds = na.array([[0., 0., 0.], period])
# Cut up the volume evenly initially, with no padding.
@@ -1969,7 +1982,8 @@
(LE_padding, RE_padding) = self.padding
parallelHOPHaloList.__init__(self, self._data_source, self.padding, \
self.num_neighbors, self.bounds, total_mass, period, \
- threshold=threshold, dm_only=dm_only, rearrange=rearrange, premerge=premerge)
+ threshold=threshold, dm_only=dm_only, rearrange=rearrange, premerge=premerge,
+ tree = self.tree)
self._join_halolists()
yt_counters("Final Grouping")
@@ -2120,6 +2134,7 @@
mass in the entire volume.
Default = None, which means the total mass is automatically
calculated.
+
Examples
--------
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/analysis_modules/halo_finding/parallel_hop/parallel_hop_interface.py
--- a/yt/analysis_modules/halo_finding/parallel_hop/parallel_hop_interface.py
+++ b/yt/analysis_modules/halo_finding/parallel_hop/parallel_hop_interface.py
@@ -38,6 +38,8 @@
except ImportError:
mylog.debug("The Fortran kD-Tree did not import correctly.")
+from yt.utilities.spatial import cKDTree
+
from yt.utilities.parallel_tools.parallel_analysis_interface import \
parallel_blocking_call, \
ParallelAnalysisInterface
@@ -45,7 +47,7 @@
class ParallelHOPHaloFinder(ParallelAnalysisInterface):
def __init__(self,period, padding, num_neighbors, bounds,
particle_fields, threshold=160.0, rearrange=True,
- premerge=True):
+ premerge=True, tree='F'):
ParallelAnalysisInterface.__init__(self)
self.threshold = threshold
self.rearrange = rearrange
@@ -64,6 +66,7 @@
self.mass = particle_fields.pop("ParticleMassMsun")
self.padded_particles = []
self.nMerge = 4
+ self.tree = tree
yt_counters("chainHOP")
self.max_mem = 0
self.__max_memory()
@@ -342,34 +345,50 @@
Set up the data objects that get passed to the kD-tree code.
"""
yt_counters("init kd tree")
- # Yes, we really do need to initialize this many arrays.
- # They're deleted in _parallelHOP.
- fKD.dens = na.zeros(self.size, dtype='float64', order='F')
- fKD.mass = na.concatenate((self.mass, self.mass_pad))
- del self.mass
- fKD.pos = na.empty((3, self.size), dtype='float64', order='F')
- # This actually copies the data into the fortran space.
- self.psize = self.xpos.size
- fKD.pos[0, :self.psize] = self.xpos
- fKD.pos[1, :self.psize] = self.ypos
- fKD.pos[2, :self.psize] = self.zpos
- del self.xpos, self.ypos, self.zpos
- gc.collect()
- fKD.pos[0, self.psize:] = self.xpos_pad
- fKD.pos[1, self.psize:] = self.ypos_pad
- fKD.pos[2, self.psize:] = self.zpos_pad
- del self.xpos_pad, self.ypos_pad, self.zpos_pad
- gc.collect()
- fKD.qv = na.asfortranarray(na.empty(3, dtype='float64'))
- fKD.nn = self.num_neighbors
- # Plus 2 because we're looking for that neighbor, but only keeping
- # nMerge + 1 neighbor tags, skipping ourselves.
- fKD.nMerge = self.nMerge + 2
- fKD.nparts = self.size
- fKD.sort = True # Slower, but needed in _connect_chains
- fKD.rearrange = self.rearrange # True is faster, but uses more memory
- # Now call the fortran.
- create_tree(0)
+ if self.tree == 'F':
+ # Yes, we really do need to initialize this many arrays.
+ # They're deleted in _parallelHOP.
+ fKD.dens = na.zeros(self.size, dtype='float64', order='F')
+ fKD.mass = na.concatenate((self.mass, self.mass_pad))
+ del self.mass
+ fKD.pos = na.empty((3, self.size), dtype='float64', order='F')
+ # This actually copies the data into the fortran space.
+ self.psize = self.xpos.size
+ fKD.pos[0, :self.psize] = self.xpos
+ fKD.pos[1, :self.psize] = self.ypos
+ fKD.pos[2, :self.psize] = self.zpos
+ del self.xpos, self.ypos, self.zpos
+ gc.collect()
+ fKD.pos[0, self.psize:] = self.xpos_pad
+ fKD.pos[1, self.psize:] = self.ypos_pad
+ fKD.pos[2, self.psize:] = self.zpos_pad
+ del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ gc.collect()
+ fKD.qv = na.asfortranarray(na.empty(3, dtype='float64'))
+ fKD.nn = self.num_neighbors
+ # Plus 2 because we're looking for that neighbor, but only keeping
+ # nMerge + 1 neighbor tags, skipping ourselves.
+ fKD.nMerge = self.nMerge + 2
+ fKD.nparts = self.size
+ fKD.sort = True # Slower, but needed in _connect_chains
+ fKD.rearrange = self.rearrange # True is faster, but uses more memory
+ # Now call the fortran.
+ create_tree(0)
+ elif self.tree == 'C':
+ self.mass = na.concatenate((self.mass, self.mass_pad))
+ self.pos = na.empty((self.size, 3), dtype='float64')
+ self.psize = self.xpos.size
+ self.pos[:self.psize, 0] = self.xpos
+ self.pos[:self.psize, 1] = self.ypos
+ self.pos[:self.psize, 2] = self.zpos
+ del self.xpos, self.ypos, self.zpos
+ gc.collect()
+ self.pos[self.psize:, 0] = self.xpos_pad
+ self.pos[self.psize:, 1] = self.ypos_pad
+ self.pos[self.psize:, 2] = self.zpos_pad
+ del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ gc.collect()
+ self.kdtree = cKDTree(self.pos, leafsize = 32)
self.__max_memory()
yt_counters("init kd tree")
@@ -395,8 +414,12 @@
self.is_inside = ( (points >= LE).all(axis=1) * \
(points < RE).all(axis=1) )
elif round == 'second':
- self.is_inside = ( (fKD.pos.T >= LE).all(axis=1) * \
- (fKD.pos.T < RE).all(axis=1) )
+ if self.tree == 'F':
+ self.is_inside = ( (fKD.pos.T >= LE).all(axis=1) * \
+ (fKD.pos.T < RE).all(axis=1) )
+ elif self.tree == 'C':
+ self.is_inside = ( (self.pos > LE).all(axis=1) * \
+ (self.pos < RE).all(axis=1) )
# Below we find out which particles are in the `annulus', one padding
# distance inside the boundaries. First we find the particles outside
# this inner boundary.
@@ -406,8 +429,12 @@
inner = na.invert( (points >= temp_LE).all(axis=1) * \
(points < temp_RE).all(axis=1) )
elif round == 'second' or round == 'third':
- inner = na.invert( (fKD.pos.T >= temp_LE).all(axis=1) * \
- (fKD.pos.T < temp_RE).all(axis=1) )
+ if self.tree == 'F':
+ inner = na.invert( (fKD.pos.T >= temp_LE).all(axis=1) * \
+ (fKD.pos.T < temp_RE).all(axis=1) )
+ elif self.tree == 'C':
+ inner = na.invert( (self.pos >= temp_LE).all(axis=1) * \
+ (self.pos < temp_RE).all(axis=1) )
if round == 'first':
del points
# After inverting the logic above, we want points that are both
@@ -444,26 +471,44 @@
self.densestNN = na.empty(self.size,dtype='int64')
# We find nearest neighbors in chunks.
chunksize = 10000
- fKD.chunk_tags = na.asfortranarray(na.empty((self.num_neighbors, chunksize), dtype='int64'))
- start = 1 # Fortran counting!
- finish = 0
- while finish < self.size:
- finish = min(finish+chunksize,self.size)
- # Call the fortran. start and finish refer to the data locations
- # in fKD.pos, and specify the range of particles to find nearest
- # neighbors
- fKD.start = start
- fKD.finish = finish
- find_chunk_nearest_neighbors()
- chunk_NNtags = (fKD.chunk_tags[:,:finish-start+1] - 1).transpose()
- # Find the densest nearest neighbors by referencing the already
- # calculated density.
- n_dens = na.take(self.density,chunk_NNtags)
- max_loc = na.argmax(n_dens,axis=1)
- for i in xrange(finish - start + 1): # +1 for fortran counting.
- j = start + i - 1 # -1 for fortran counting.
- self.densestNN[j] = chunk_NNtags[i,max_loc[i]]
- start = finish + 1
+ if self.tree == 'F':
+ fKD.chunk_tags = na.asfortranarray(na.empty((self.num_neighbors, chunksize), dtype='int64'))
+ start = 1 # Fortran counting!
+ finish = 0
+ while finish < self.size:
+ finish = min(finish+chunksize,self.size)
+ # Call the fortran. start and finish refer to the data locations
+ # in fKD.pos, and specify the range of particles to find nearest
+ # neighbors
+ fKD.start = start
+ fKD.finish = finish
+ find_chunk_nearest_neighbors()
+ chunk_NNtags = (fKD.chunk_tags[:,:finish-start+1] - 1).transpose()
+ # Find the densest nearest neighbors by referencing the already
+ # calculated density.
+ n_dens = na.take(self.density,chunk_NNtags)
+ max_loc = na.argmax(n_dens,axis=1)
+ for i in xrange(finish - start + 1): # +1 for fortran counting.
+ j = start + i - 1 # -1 for fortran counting.
+ self.densestNN[j] = chunk_NNtags[i,max_loc[i]]
+ start = finish + 1
+ elif self.tree == 'C':
+ start = 0
+ finish = 0
+ while finish < self.size - 1:
+ finish = min(finish+chunksize, self.size)
+ # Unlike above, this function returns a new chunk_NNtags
+ # that is the right size every time. But this may not actually
+ # be as memory efficient - fragmenting?
+ chunk_NNtags = self.kdtree.find_chunk_nearest_neighbors(start, \
+ finish, num_neighbors=self.num_neighbors)
+ n_dens = na.take(self.density, chunk_NNtags)
+ max_loc = na.argmax(n_dens, axis=1)
+ max_loc = na.argmax(n_dens,axis=1)
+ for i in xrange(finish - start):
+ j = start + i
+ self.densestNN[j] = chunk_NNtags[i,max_loc[i]]
+ start = finish
yt_counters("densestNN")
self.__max_memory()
del chunk_NNtags, max_loc, n_dens
@@ -568,12 +613,15 @@
chain_map = defaultdict(set)
for i in xrange(max(self.chainID)+1):
chain_map[i].add(i)
- # Plus 2 because we're looking for that neighbor, but only keeping
- # nMerge + 1 neighbor tags, skipping ourselves.
- fKD.dist = na.empty(self.nMerge+2, dtype='float64')
- fKD.tags = na.empty(self.nMerge+2, dtype='int64')
- # We can change this here to make the searches faster.
- fKD.nn = self.nMerge+2
+ if self.tree == 'F':
+ # Plus 2 because we're looking for that neighbor, but only keeping
+ # nMerge + 1 neighbor tags, skipping ourselves.
+ fKD.dist = na.empty(self.nMerge+2, dtype='float64')
+ fKD.tags = na.empty(self.nMerge+2, dtype='int64')
+ # We can change this here to make the searches faster.
+ fKD.nn = self.nMerge + 2
+ elif self.tree == 'C':
+ nn = self.nMerge + 2
yt_counters("preconnect kd tree search.")
for i in xrange(self.size):
# Don't consider this particle if it's not part of a chain.
@@ -586,9 +634,13 @@
# We're only connecting >= peakthresh chains now.
if part_max_dens < self.peakthresh: continue
# Loop over nMerge closest nearest neighbors.
- fKD.qv = fKD.pos[:, i]
- find_nn_nearest_neighbors()
- NNtags = fKD.tags[:] - 1
+ if self.tree == 'F':
+ fKD.qv = fKD.pos[:, i]
+ find_nn_nearest_neighbors()
+ NNtags = fKD.tags[:] - 1
+ elif self.tree == 'C':
+ qv = self.pos[i, :]
+ NNtags = self.kdtree.query(qv, nn)[1]
same_count = 0
for j in xrange(int(self.nMerge+1)):
thisNN = NNtags[j+1] # Don't consider ourselves at NNtags[0]
@@ -1002,10 +1054,13 @@
self.chain_densest_n = {} # chainID -> {chainIDs->boundary dens}
# Plus 2 because we're looking for that neighbor, but only keeping
# nMerge + 1 neighbor tags, skipping ourselves.
- fKD.dist = na.empty(self.nMerge+2, dtype='float64')
- fKD.tags = na.empty(self.nMerge+2, dtype='int64')
- # We can change this here to make the searches faster.
- fKD.nn = self.nMerge+2
+ if self.tree == 'F':
+ fKD.dist = na.empty(self.nMerge+2, dtype='float64')
+ fKD.tags = na.empty(self.nMerge+2, dtype='int64')
+ # We can change this here to make the searches faster.
+ fKD.nn = self.nMerge+2
+ elif self.tree == 'C':
+ nn = self.nMerge + 2
for i in xrange(int(self.size)):
# Don't consider this particle if it's not part of a chain.
if self.chainID[i] < 0: continue
@@ -1018,9 +1073,13 @@
# Make sure we're skipping deleted chains.
if part_max_dens == -1.0: continue
# Loop over nMerge closest nearest neighbors.
- fKD.qv = fKD.pos[:, i]
- find_nn_nearest_neighbors()
- NNtags = fKD.tags[:] - 1
+ if self.tree == 'F':
+ fKD.qv = fKD.pos[:, i]
+ find_nn_nearest_neighbors()
+ NNtags = fKD.tags[:] - 1
+ elif self.tree == 'C':
+ qv = self.pos[i, :]
+ NNtags = self.kdtree.query(qv, nn)[1]
for j in xrange(int(self.nMerge+1)):
thisNN = NNtags[j+1] # Don't consider ourselves at NNtags[0]
thisNN_chainID = self.chainID[thisNN]
@@ -1345,11 +1404,14 @@
select = (self.chainID != -1)
calc = len(na.where(select == True)[0])
loc = na.empty((calc, 3), dtype='float64')
- loc[:, 0] = na.concatenate((self.xpos, self.xpos_pad))[select]
- loc[:, 1] = na.concatenate((self.ypos, self.ypos_pad))[select]
- loc[:, 2] = na.concatenate((self.zpos, self.zpos_pad))[select]
- self.__max_memory()
- del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ if self.tree == 'F':
+ loc[:, 0] = na.concatenate((self.xpos, self.xpos_pad))[select]
+ loc[:, 1] = na.concatenate((self.ypos, self.ypos_pad))[select]
+ loc[:, 2] = na.concatenate((self.zpos, self.zpos_pad))[select]
+ self.__max_memory()
+ del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ elif self.tree == 'C':
+ loc = self.pos[select]
subchain = self.chainID[select]
# First we need to find the maximum density point for all groups.
# I think this will be faster than several vector operations that need
@@ -1470,10 +1532,17 @@
# Loop over the particles to find NN for each.
mylog.info('Finding nearest neighbors/density...')
yt_counters("chainHOP_tags_dens")
- chainHOP_tags_dens()
+ if self.tree == 'F':
+ chainHOP_tags_dens()
+ elif self.tree == 'C':
+ self.density = self.kdtree.chainHOP_get_dens(self.mass, \
+ num_neighbors = self.num_neighbors, nMerge = self.nMerge + 2)
yt_counters("chainHOP_tags_dens")
- self.density = fKD.dens.copy()
- # Now each particle has NNtags, and a local self density.
+ if self.tree == 'F':
+ self.density = fKD.dens.copy()
+ elif self.tree == 'C':
+ pass
+ # Now each particle a local self density.
# Let's find densest NN
mylog.info('Finding densest nearest neighbors...')
self._densestNN()
@@ -1496,17 +1565,22 @@
self._communicate_annulus_chainIDs()
mylog.info('Connecting %d chains into groups...' % self.nchains)
self._connect_chains()
- self.mass = fKD.mass[:self.psize]
- self.mass_pad = fKD.mass[self.psize:]
- del fKD.dens, fKD.mass, fKD.dens
- self.xpos = fKD.pos[0, :self.psize]
- self.ypos = fKD.pos[1, :self.psize]
- self.zpos = fKD.pos[2, :self.psize]
- self.xpos_pad = fKD.pos[0, self.psize:]
- self.ypos_pad = fKD.pos[1, self.psize:]
- self.zpos_pad = fKD.pos[2, self.psize:]
- del fKD.pos, fKD.chunk_tags
- free_tree(0) # Frees the kdtree object.
+ if self.tree == 'F':
+ self.mass = fKD.mass[:self.psize]
+ self.mass_pad = fKD.mass[self.psize:]
+ del fKD.dens, fKD.mass, fKD.dens
+ self.xpos = fKD.pos[0, :self.psize]
+ self.ypos = fKD.pos[1, :self.psize]
+ self.zpos = fKD.pos[2, :self.psize]
+ self.xpos_pad = fKD.pos[0, self.psize:]
+ self.ypos_pad = fKD.pos[1, self.psize:]
+ self.zpos_pad = fKD.pos[2, self.psize:]
+ del fKD.pos, fKD.chunk_tags
+ free_tree(0) # Frees the kdtree object.
+ gc.collect()
+ elif self.tree == 'C':
+ del self.kdtree
+ gc.collect()
del self.densestNN
mylog.info('Communicating group links globally...')
self._make_global_chain_densest_n()
@@ -1530,7 +1604,10 @@
for groupID in self.I_own[taskID]:
self.halo_taskmap[groupID].add(taskID)
del self.I_own
- del self.xpos, self.ypos, self.zpos
+ if self.tree == 'F':
+ del self.xpos, self.ypos, self.zpos
+ elif self.tree == 'C':
+ pass
def __add_to_array(self, arr, key, value, type):
"""
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/setup.py
--- a/yt/utilities/setup.py
+++ b/yt/utilities/setup.py
@@ -148,6 +148,8 @@
config.add_subpackage("delaunay") # From SciPy, written by Robert Kern
config.add_subpackage("kdtree")
config.add_data_files(('kdtree', ['kdtree/fKDpy.so',]))
+ config.add_extension('spatial', ["yt/utilities/spatial/ckdtree.pyx"],
+ libraries=["m"])
config.add_subpackage("parallel_tools")
config.add_extension("data_point_utilities",
"yt/utilities/data_point_utilities.c", libraries=["m"])
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/README
--- /dev/null
+++ b/yt/utilities/spatial/README
@@ -0,0 +1,35 @@
+Stephen Skory
+s at skory.us
+October 2011
+
+This directory is a modified version of the same directory that is part of
+the scipy.spatial package. It has been modified by me in the following
+ways:
+
+- In ckdtree.pyx, distances and searches over the
+ tree both take periodic boundary
+ conditions into account.
+
+- In ckdtree.pyx, all input and output arrays now
+ use 64-bit types: long and double.
+
+- In ckdtree.pyx, I've added two functions specifically for parallel HOP,
+ chainHOP_get_dens and find_chunk_nearest_neighbors.
+
+- In kdtree.py, I've commented out 'import scipy.sparse',
+ which means that any kdtree functionality that uses sparse
+ will not work. This is to avoid needing to build the rest
+ of scipy, which is a challenge and not necessary for just
+ the kdtree.
+
+- I've removed all of the qhull source and functionality.
+
+- I've removed the 'tests' directory.
+
+- I've removed anything having to do with Bento, the
+ python package manager.
+
+Anything that has been removed can be found in the original scipy
+source distribution.
+
+
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/__init__.py
--- /dev/null
+++ b/yt/utilities/spatial/__init__.py
@@ -0,0 +1,34 @@
+"""
+=============================================================
+Spatial algorithms and data structures (:mod:`scipy.spatial`)
+=============================================================
+
+Nearest-neighbor queries:
+
+.. autosummary::
+ :toctree: generated/
+
+ KDTree -- class for efficient nearest-neighbor queries
+ cKDTree -- class for efficient nearest-neighbor queries (faster impl.)
+ distance -- module containing many different distance measures
+
+Delaunay triangulation:
+
+.. autosummary::
+ :toctree: generated/
+
+ Delaunay
+ tsearch
+
+"""
+
+from kdtree import *
+from ckdtree import *
+#from qhull import *
+
+__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ += ['distance']
+
+import distance
+from numpy.testing import Tester
+test = Tester().test
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/ckdtree.pyx
--- /dev/null
+++ b/yt/utilities/spatial/ckdtree.pyx
@@ -0,0 +1,758 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import numpy as np
+cimport numpy as np
+cimport stdlib
+cimport cython
+
+import kdtree
+
+cdef double infinity = np.inf
+
+__all__ = ['cKDTree']
+
+
+# priority queue
+cdef union heapcontents:
+ int intdata
+ char* ptrdata
+
+cdef struct heapitem:
+ double priority
+ heapcontents contents
+
+cdef struct heap:
+ int n
+ heapitem* heap
+ int space
+
+cdef inline heapcreate(heap* self,int initial_size):
+ self.space = initial_size
+ self.heap = <heapitem*>stdlib.malloc(sizeof(heapitem)*self.space)
+ self.n=0
+
+cdef inline heapdestroy(heap* self):
+ stdlib.free(self.heap)
+
+cdef inline heapresize(heap* self, int new_space):
+ if new_space<self.n:
+ raise ValueError("Heap containing %d items cannot be resized to %d" % (self.n, new_space))
+ self.space = new_space
+ self.heap = <heapitem*>stdlib.realloc(<void*>self.heap,new_space*sizeof(heapitem))
+
+cdef inline heappush(heap* self, heapitem item):
+ cdef int i
+ cdef heapitem t
+
+ self.n += 1
+ if self.n>self.space:
+ heapresize(self,2*self.space+1)
+
+ i = self.n-1
+ self.heap[i] = item
+ while i>0 and self.heap[i].priority<self.heap[(i-1)//2].priority:
+ t = self.heap[(i-1)//2]
+ self.heap[(i-1)//2] = self.heap[i]
+ self.heap[i] = t
+ i = (i-1)//2
+
+cdef heapitem heappeek(heap* self):
+ return self.heap[0]
+
+cdef heapremove(heap* self):
+ cdef heapitem t
+ cdef int i, j, k, l
+
+ self.heap[0] = self.heap[self.n-1]
+ self.n -= 1
+ if self.n < self.space//4 and self.space>40: #FIXME: magic number
+ heapresize(self,self.space//2+1)
+
+ i=0
+ j=1
+ k=2
+ while ((j<self.n and
+ self.heap[i].priority > self.heap[j].priority or
+ k<self.n and
+ self.heap[i].priority > self.heap[k].priority)):
+ if k<self.n and self.heap[j].priority>self.heap[k].priority:
+ l = k
+ else:
+ l = j
+ t = self.heap[l]
+ self.heap[l] = self.heap[i]
+ self.heap[i] = t
+ i = l
+ j = 2*i+1
+ k = 2*i+2
+
+cdef heapitem heappop(heap* self):
+ cdef heapitem it
+ it = heappeek(self)
+ heapremove(self)
+ return it
+
+
+
+
+
+# utility functions
+cdef inline double dmax(double x, double y):
+ if x>y:
+ return x
+ else:
+ return y
+cdef inline double dabs(double x):
+ if x>0:
+ return x
+ else:
+ return -x
+cdef inline double dmin(double x, double y):
+ if x<y:
+ return x
+ else:
+ return y
+cdef inline double _distance_p(double*x,double*y,double p,int k,double upperbound,
+ double*period):
+ """Compute the distance between x and y
+
+ Computes the Minkowski p-distance to the power p between two points.
+ If the distance**p is larger than upperbound, then any number larger
+ than upperbound may be returned (the calculation is truncated).
+
+ Periodicity added by S. Skory.
+ """
+ cdef int i
+ cdef double r, m
+ r = 0
+ if p==infinity:
+ for i in range(k):
+ m = dmin(dabs(x[i] - y[i]), period[i] - dabs(x[i] - y[i]))
+ r = dmax(r,m)
+ if r>upperbound:
+ return r
+ elif p==1:
+ for i in range(k):
+ m = dmin(dabs(x[i] - y[i]), period[i] - dabs(x[i] - y[i]))
+ r += m
+ if r>upperbound:
+ return r
+ else:
+ for i in range(k):
+ m = dmin(dabs(x[i] - y[i]), period[i] - dabs(x[i] - y[i]))
+ r += m**p
+ if r>upperbound:
+ return r
+ return r
+
+
+
+# Tree structure
+cdef struct innernode:
+ int split_dim
+ int n_points
+ double split
+ double* maxes
+ double* mins
+ innernode* less
+ innernode* greater
+cdef struct leafnode:
+ int split_dim
+ int n_points
+ int start_idx
+ int end_idx
+ double* maxes
+ double* mins
+
+# this is the standard trick for variable-size arrays:
+# malloc sizeof(nodeinfo)+self.m*sizeof(double) bytes.
+cdef struct nodeinfo:
+ innernode* node
+ double side_distances[0]
+
+cdef class cKDTree:
+ """kd-tree for quick nearest-neighbor lookup
+
+ This class provides an index into a set of k-dimensional points
+ which can be used to rapidly look up the nearest neighbors of any
+ point.
+
+ The algorithm used is described in Maneewongvatana and Mount 1999.
+ The general idea is that the kd-tree is a binary trie, each of whose
+ nodes represents an axis-aligned hyperrectangle. Each node specifies
+ an axis and splits the set of points based on whether their coordinate
+ along that axis is greater than or less than a particular value.
+
+ During construction, the axis and splitting point are chosen by the
+ "sliding midpoint" rule, which ensures that the cells do not all
+ become long and thin.
+
+ The tree can be queried for the r closest neighbors of any given point
+ (optionally returning only those within some maximum distance of the
+ point). It can also be queried, with a substantial gain in efficiency,
+ for the r approximate closest neighbors.
+
+ For large dimensions (20 is already large) do not expect this to run
+ significantly faster than brute force. High-dimensional nearest-neighbor
+ queries are a substantial open problem in computer science.
+
+ Parameters
+ ----------
+ data : array-like, shape (n,m)
+ The n data points of dimension m to be indexed. This array is
+ not copied unless this is necessary to produce a contiguous
+ array of doubles, and so modifying this data will result in
+ bogus results.
+ leafsize : positive integer
+ The number of points at which the algorithm switches over to
+ brute-force.
+
+ """
+
+ cdef innernode* tree
+ cdef readonly object data
+ cdef double* raw_data
+ cdef readonly int n, m
+ cdef readonly int leafsize
+ cdef readonly object maxes
+ cdef double* raw_maxes
+ cdef readonly object mins
+ cdef double* raw_mins
+ cdef object indices
+ cdef np.int64_t* raw_indices
+ def __init__(cKDTree self, data, int leafsize=10):
+ cdef np.ndarray[double, ndim=2] inner_data
+ cdef np.ndarray[double, ndim=1] inner_maxes
+ cdef np.ndarray[double, ndim=1] inner_mins
+ cdef np.ndarray[np.int64_t, ndim=1] inner_indices
+ self.data = np.ascontiguousarray(data,dtype=np.double)
+ self.n, self.m = np.shape(self.data)
+ self.leafsize = leafsize
+ if self.leafsize<1:
+ raise ValueError("leafsize must be at least 1")
+ self.maxes = np.ascontiguousarray(np.amax(self.data,axis=0))
+ self.mins = np.ascontiguousarray(np.amin(self.data,axis=0))
+ self.indices = np.ascontiguousarray(np.arange(self.n,dtype=np.int64))
+
+ inner_data = self.data
+ self.raw_data = <double*>inner_data.data
+ inner_maxes = self.maxes
+ self.raw_maxes = <double*>inner_maxes.data
+ inner_mins = self.mins
+ self.raw_mins = <double*>inner_mins.data
+ inner_indices = self.indices
+ self.raw_indices = <np.int64_t*>inner_indices.data
+
+ self.tree = self.__build(0, self.n, self.raw_maxes, self.raw_mins)
+
+ cdef innernode* __build(cKDTree self, int start_idx, int end_idx, double* maxes, double* mins):
+ cdef leafnode* n
+ cdef innernode* ni
+ cdef int i, j, t, p, q, d
+ cdef double size, split, minval, maxval
+ cdef double*mids
+ if end_idx-start_idx<=self.leafsize:
+ n = <leafnode*>stdlib.malloc(sizeof(leafnode))
+ # Skory
+ n.maxes = <double*>stdlib.malloc(sizeof(double)*self.m)
+ n.mins = <double*>stdlib.malloc(sizeof(double)*self.m)
+ for i in range(self.m):
+ n.maxes[i] = maxes[i]
+ n.mins[i] = mins[i]
+ n.split_dim = -1
+ n.start_idx = start_idx
+ n.end_idx = end_idx
+ return <innernode*>n
+ else:
+ d = 0
+ size = 0
+ for i in range(self.m):
+ if maxes[i]-mins[i] > size:
+ d = i
+ size = maxes[i]-mins[i]
+ maxval = maxes[d]
+ minval = mins[d]
+ if maxval==minval:
+ # all points are identical; warn user?
+ n = <leafnode*>stdlib.malloc(sizeof(leafnode))
+ n.split_dim = -1
+ n.start_idx = start_idx
+ n.end_idx = end_idx
+ return <innernode*>n
+
+ split = (maxval+minval)/2
+
+ p = start_idx
+ q = end_idx-1
+ while p<=q:
+ if self.raw_data[self.raw_indices[p]*self.m+d]<split:
+ p+=1
+ elif self.raw_data[self.raw_indices[q]*self.m+d]>=split:
+ q-=1
+ else:
+ t = self.raw_indices[p]
+ self.raw_indices[p] = self.raw_indices[q]
+ self.raw_indices[q] = t
+ p+=1
+ q-=1
+
+ # slide midpoint if necessary
+ if p==start_idx:
+ # no points less than split
+ j = start_idx
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ for i in range(start_idx+1, end_idx):
+ if self.raw_data[self.raw_indices[i]*self.m+d]<split:
+ j = i
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ t = self.raw_indices[start_idx]
+ self.raw_indices[start_idx] = self.raw_indices[j]
+ self.raw_indices[j] = t
+ p = start_idx+1
+ q = start_idx
+ elif p==end_idx:
+ # no points greater than split
+ j = end_idx-1
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ for i in range(start_idx, end_idx-1):
+ if self.raw_data[self.raw_indices[i]*self.m+d]>split:
+ j = i
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ t = self.raw_indices[end_idx-1]
+ self.raw_indices[end_idx-1] = self.raw_indices[j]
+ self.raw_indices[j] = t
+ p = end_idx-1
+ q = end_idx-2
+
+ # construct new node representation
+ ni = <innernode*>stdlib.malloc(sizeof(innernode))
+
+ mids = <double*>stdlib.malloc(sizeof(double)*self.m)
+ for i in range(self.m):
+ mids[i] = maxes[i]
+ mids[d] = split
+ ni.less = self.__build(start_idx,p,mids,mins)
+
+ for i in range(self.m):
+ mids[i] = mins[i]
+ mids[d] = split
+ ni.greater = self.__build(p,end_idx,maxes,mids)
+
+ stdlib.free(mids)
+
+ ni.split_dim = d
+ ni.split = split
+ # Skory
+ ni.maxes = <double*>stdlib.malloc(sizeof(double)*self.m)
+ ni.mins = <double*>stdlib.malloc(sizeof(double)*self.m)
+ for i in range(self.m):
+ ni.maxes[i] = maxes[i]
+ ni.mins[i] = mins[i]
+
+ return ni
+
+ cdef __free_tree(cKDTree self, innernode* node):
+ if node.split_dim!=-1:
+ self.__free_tree(node.less)
+ self.__free_tree(node.greater)
+ stdlib.free(node.maxes) # Skory
+ stdlib.free(node.mins)
+ stdlib.free(node)
+
+ def __dealloc__(cKDTree self):
+ if <int>(self.tree) == 0:
+ # should happen only if __init__ was never called
+ return
+ self.__free_tree(self.tree)
+
+ cdef void __query(cKDTree self,
+ double*result_distances,
+ long*result_indices,
+ double*x,
+ int k,
+ double eps,
+ double p,
+ double distance_upper_bound,
+ double*period):
+ cdef heap q
+ cdef heap neighbors
+
+ cdef int i, j
+ cdef double t
+ cdef nodeinfo* inf
+ cdef nodeinfo* inf2
+ cdef double d
+ cdef double m_left, m_right, m
+ cdef double epsfac
+ cdef double min_distance
+ cdef double far_min_distance
+ cdef heapitem it, it2, neighbor
+ cdef leafnode* node
+ cdef innernode* inode
+ cdef innernode* near
+ cdef innernode* far
+ cdef double* side_distances
+
+ # priority queue for chasing nodes
+ # entries are:
+ # minimum distance between the cell and the target
+ # distances between the nearest side of the cell and the target
+ # the head node of the cell
+ heapcreate(&q,12)
+
+ # priority queue for the nearest neighbors
+ # furthest known neighbor first
+ # entries are (-distance**p, i)
+ heapcreate(&neighbors,k)
+
+ # set up first nodeinfo
+ inf = <nodeinfo*>stdlib.malloc(sizeof(nodeinfo)+self.m*sizeof(double))
+ inf.node = self.tree
+ for i in range(self.m):
+ inf.side_distances[i] = 0
+ t = x[i]-self.raw_maxes[i]
+ if t>inf.side_distances[i]:
+ inf.side_distances[i] = t
+ else:
+ t = self.raw_mins[i]-x[i]
+ if t>inf.side_distances[i]:
+ inf.side_distances[i] = t
+ if p!=1 and p!=infinity:
+ inf.side_distances[i]=inf.side_distances[i]**p
+
+ # compute first distance
+ min_distance = 0.
+ for i in range(self.m):
+ if p==infinity:
+ min_distance = dmax(min_distance,inf.side_distances[i])
+ else:
+ min_distance += inf.side_distances[i]
+
+ # fiddle approximation factor
+ if eps==0:
+ epsfac=1
+ elif p==infinity:
+ epsfac = 1/(1+eps)
+ else:
+ epsfac = 1/(1+eps)**p
+
+ # internally we represent all distances as distance**p
+ if p!=infinity and distance_upper_bound!=infinity:
+ distance_upper_bound = distance_upper_bound**p
+
+ while True:
+ if inf.node.split_dim==-1:
+ node = <leafnode*>inf.node
+
+ # brute-force
+ for i in range(node.start_idx,node.end_idx):
+ d = _distance_p(
+ self.raw_data+self.raw_indices[i]*self.m,
+ x,p,self.m,distance_upper_bound,period)
+
+ if d<distance_upper_bound:
+ # replace furthest neighbor
+ if neighbors.n==k:
+ heapremove(&neighbors)
+ neighbor.priority = -d
+ neighbor.contents.intdata = self.raw_indices[i]
+ heappush(&neighbors,neighbor)
+
+ # adjust upper bound for efficiency
+ if neighbors.n==k:
+ distance_upper_bound = -heappeek(&neighbors).priority
+ # done with this node, get another
+ stdlib.free(inf)
+ if q.n==0:
+ # no more nodes to visit
+ break
+ else:
+ it = heappop(&q)
+ inf = <nodeinfo*>it.contents.ptrdata
+ min_distance = it.priority
+ else:
+ inode = <innernode*>inf.node
+
+ # we don't push cells that are too far onto the queue at all,
+ # but since the distance_upper_bound decreases, we might get
+ # here even if the cell's too far
+ if min_distance>distance_upper_bound*epsfac:
+ # since this is the nearest cell, we're done, bail out
+ stdlib.free(inf)
+ # free all the nodes still on the heap
+ for i in range(q.n):
+ stdlib.free(q.heap[i].contents.ptrdata)
+ break
+
+ # set up children for searching
+ if x[inode.split_dim]<inode.split:
+ near = inode.less
+ far = inode.greater
+ else:
+ near = inode.greater
+ far = inode.less
+
+ # near child is at the same distance as the current node
+ # we're going here next, so no point pushing it on the queue
+ # no need to recompute the distance or the side_distances
+ inf.node = near
+
+ # far child is further by an amount depending only
+ # on the split value; compute its distance and side_distances
+ # and push it on the queue if it's near enough
+ inf2 = <nodeinfo*>stdlib.malloc(sizeof(nodeinfo)+self.m*sizeof(double))
+ it2.contents.ptrdata = <char*> inf2
+ inf2.node = far
+
+ # Periodicity added by S Skory
+ m_left = dmin( dabs(far.mins[inode.split_dim] - x[inode.split_dim]), \
+ period[inode.split_dim] - dabs(far.mins[inode.split_dim] - x[inode.split_dim]))
+ m_right = dmin( dabs(far.maxes[inode.split_dim] - x[inode.split_dim]), \
+ period[inode.split_dim] - dabs(far.maxes[inode.split_dim] - x[inode.split_dim]))
+ m = dmin(m_left,m_right)
+
+ # most side distances unchanged
+ for i in range(self.m):
+ inf2.side_distances[i] = inf.side_distances[i]
+
+ # one side distance changes
+ # we can adjust the minimum distance without recomputing
+ if p == infinity:
+ # we never use side_distances in the l_infinity case
+ # inf2.side_distances[inode.split_dim] = dabs(inode.split-x[inode.split_dim])
+ far_min_distance = dmax(min_distance, m)
+ elif p == 1:
+ inf2.side_distances[inode.split_dim] = m
+ far_min_distance = dmax(min_distance, m)
+ else:
+ inf2.side_distances[inode.split_dim] = m**p
+ #far_min_distance = min_distance - inf.side_distances[inode.split_dim] + inf2.side_distances[inode.split_dim]
+ far_min_distance = m**p
+
+ it2.priority = far_min_distance
+
+
+ # far child might be too far, if so, don't bother pushing it
+ if far_min_distance<=distance_upper_bound*epsfac:
+ heappush(&q,it2)
+ else:
+ stdlib.free(inf2)
+ # just in case
+ it2.contents.ptrdata = <char*> 0
+
+ # fill output arrays with sorted neighbors
+ for i in range(neighbors.n-1,-1,-1):
+ neighbor = heappop(&neighbors) # FIXME: neighbors may be realloced
+ result_indices[i] = neighbor.contents.intdata
+ if p==1 or p==infinity:
+ result_distances[i] = -neighbor.priority
+ else:
+ result_distances[i] = (-neighbor.priority) #**(1./p) S. Skory
+
+ heapdestroy(&q)
+ heapdestroy(&neighbors)
+
+ def query(cKDTree self, object x, int k=1, double eps=0, double p=2,
+ double distance_upper_bound=infinity, object period=None):
+ """query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf,
+ period=None)
+
+ Query the kd-tree for nearest neighbors.
+
+ Parameters
+ ----------
+ x : array_like, last dimension self.m
+ An array of points to query.
+ k : int
+ The number of nearest neighbors to return.
+ eps : non-negative float
+ Return approximate nearest neighbors; the k-th returned value
+ is guaranteed to be no further than (1 + `eps`) times the
+ distance to the real k-th nearest neighbor.
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+ 1 is the sum-of-absolute-values "Manhattan" distance.
+ 2 is the usual Euclidean distance.
+ infinity is the maximum-coordinate-difference distance.
+ distance_upper_bound : non-negative float
+ Return only neighbors within this distance. This is used to prune
+ tree searches, so if you are doing a series of nearest-neighbor
+ queries, it may help to supply the distance to the nearest neighbor
+ of the most recent point.
+
+ Returns
+ -------
+ d : ndarray of floats
+ The distances to the nearest neighbors.
+ If `x` has shape tuple+(self.m,), then `d` has shape tuple+(k,).
+ Missing neighbors are indicated with infinite distances.
+ i : ndarray of ints
+ The locations of the neighbors in self.data.
+ If `x` has shape tuple+(self.m,), then `i` has shape tuple+(k,).
+ Missing neighbors are indicated with self.n.
+
+ """
+ cdef np.ndarray[long, ndim=2] ii
+ cdef np.ndarray[double, ndim=2] dd
+ cdef np.ndarray[double, ndim=2] xx
+ cdef np.ndarray[double, ndim=1] cperiod
+ cdef int c
+ x = np.asarray(x).astype(np.double)
+ if period is None:
+ period = np.array([np.inf]*self.m)
+ else:
+ period = np.asarray(period).astype(np.double)
+ cperiod = np.ascontiguousarray(period)
+ if np.shape(x)[-1] != self.m:
+ raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x)))
+ if p<1:
+ raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+ if len(x.shape)==1:
+ single = True
+ x = x[np.newaxis,:]
+ else:
+ single = False
+ retshape = np.shape(x)[:-1]
+ n = np.prod(retshape)
+ xx = np.reshape(x,(n,self.m))
+ xx = np.ascontiguousarray(xx)
+ dd = np.empty((n,k),dtype=np.double)
+ dd.fill(infinity)
+ ii = np.empty((n,k),dtype=np.long)
+ ii.fill(self.n)
+ for c in range(n):
+ self.__query(
+ (<double*>dd.data)+c*k,
+ (<long*>ii.data)+c*k,
+ (<double*>xx.data)+c*self.m,
+ k,
+ eps,
+ p,
+ distance_upper_bound,
+ <double*>cperiod.data)
+ if single:
+ if k==1:
+ return dd[0,0], ii[0,0]
+ else:
+ return dd[0], ii[0]
+ else:
+ if k==1:
+ return np.reshape(dd[...,0],retshape), np.reshape(ii[...,0],retshape)
+ else:
+ return np.reshape(dd,retshape+(k,)), np.reshape(ii,retshape+(k,))
+
+ def chainHOP_get_dens(cKDTree self, object mass, int num_neighbors=65, \
+ int nMerge=6):
+ """ query the tree for the nearest neighbors, to get the density
+ of particles for chainHOP.
+
+ Parameters:
+ ===========
+
+ mass: A array-like list of the masses of the particles, in the same
+ order as the data that went into building the kd tree.
+
+ num_neighbors: Optional, the number of neighbors to search for and to
+ use in the density calculation. Default is 65, and is probably what
+ one should stick with.
+
+ nMerge: The number of nearest neighbor tags to return for each particle.
+
+ Returns:
+ ========
+
+ dens: An array of the densities for each particle, in the same order
+ as the input data.
+
+ tags: A two-dimensional array of the indexes, nMerge nearest neighbors
+ for each particle.
+
+ """
+
+ # We're no longer returning all the tags in this step.
+ # We do it chunked, in find_chunk_nearest_neighbors.
+ #cdef np.ndarray[long, ndim=2] tags
+ cdef np.ndarray[double, ndim=1] dens
+ cdef np.ndarray[double, ndim=1] query
+ cdef np.ndarray[long, ndim=1] tags_temp
+ cdef np.ndarray[double, ndim=1] dist_temp
+ cdef int i, pj, j
+ cdef double ih2, fNorm, r2, rs
+
+ #tags = np.empty((self.n, nMerge), dtype=np.long)
+ dens = np.empty(self.n, dtype=np.double)
+ query = np.empty(self.m, dtype=np.double)
+ tags_temp = np.empty(num_neighbors, dtype=np.long)
+ dist_temp = np.empty(num_neighbors, dtype=np.double)
+ # Need to start out with zeros before we start adding to it.
+ dens.fill(0.0)
+
+ mass = np.array(mass).astype(np.double)
+ mass = np.ascontiguousarray(mass)
+
+ for i in range(self.n):
+ query = self.data[i]
+ (dist_temp, tags_temp) = self.query(query, k=num_neighbors, period=[1.]*3)
+
+ #calculate the density for this particle
+ ih2 = 4.0/np.max(dist_temp)
+ fNorm = 0.5*np.sqrt(ih2)*ih2/np.pi
+ for j in range(num_neighbors):
+ pj = tags_temp[j]
+ r2 = dist_temp[j] * ih2
+ rs = 2.0 - np.sqrt(r2)
+ if (r2 < 1.0):
+ rs = (1.0 - 0.75*rs*r2)
+ else:
+ rs = 0.25*rs*rs*rs
+ rs = rs * fNorm
+ dens[i] = dens[i] + rs * mass[pj]
+ dens[pj] = dens[pj] + rs * mass[i]
+
+ # store nMerge nearest neighbors
+ #tags[i,:] = tags_temp[:nMerge]
+
+ #return (dens, tags)
+ return dens
+
+ def find_chunk_nearest_neighbors(cKDTree self, int start, int finish, \
+ int num_neighbors=65):
+ """ query the tree in chunks, between start and finish, recording the
+ nearest neighbors.
+
+ Parameters:
+ ===========
+
+ start: The starting point in the dataset for this search.
+
+ finish: The ending point in the dataset for this search.
+
+ num_neighbors: Optional, the number of neighbors to search for.
+ The default is 65.
+
+ Returns:
+ ========
+
+ chunk_tags: A two-dimensional array of the nearest neighbor tags for the
+ points in this search.
+
+ """
+
+ cdef np.ndarray[long, ndim=2] chunk_tags
+ cdef np.ndarray[double, ndim=1] query
+ cdef np.ndarray[long, ndim=1] tags_temp
+ cdef np.ndarray[double, ndim=1] dist_temp
+ cdef int i
+
+ chunk_tags = np.empty((finish-start, num_neighbors), dtype=np.long)
+ query = np.empty(self.m, dtype=np.double)
+ tags_temp = np.empty(num_neighbors, dtype=np.long)
+ dist_temp = np.empty(num_neighbors, dtype=np.double)
+
+ for i in range(finish-start):
+ query = self.data[i+start]
+ (dist_temp, tags_temp) = self.query(query, k=num_neighbors, period=[1.]*3)
+ chunk_tags[i,:] = tags_temp[:]
+
+ return chunk_tags
+
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/common.h
--- /dev/null
+++ b/yt/utilities/spatial/common.h
@@ -0,0 +1,70 @@
+/**
+ * common.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved into new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CLUSTER_COMMON_H
+#define _CLUSTER_COMMON_H
+
+#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
+#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))
+
+#define NCHOOSE2(_n) ((_n)*(_n-1)/2)
+
+#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
+#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
+ CPY_BITS_PER_CHAR))
+#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
+ ((CPY_BITS_PER_CHAR-1) - \
+ ((i) % CPY_BITS_PER_CHAR))) & 0x1)
+#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
+ ((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
+ ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+
+#ifndef CPY_CEIL_DIV
+#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
+ ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
+#endif
+
+
+#ifdef CPY_DEBUG
+#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define CPY_DEBUG_MSG(...)
+#endif
+
+#endif
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/distance.py
--- /dev/null
+++ b/yt/utilities/spatial/distance.py
@@ -0,0 +1,2178 @@
+"""
+=====================================================
+Distance computations (:mod:`scipy.spatial.distance`)
+=====================================================
+
+.. sectionauthor:: Damian Eads
+
+Function Reference
+------------------
+
+Distance matrix computation from a collection of raw observation vectors
+stored in a rectangular array.
+
+.. autosummary::
+ :toctree: generated/
+
+ pdist -- pairwise distances between observation vectors.
+ cdist -- distances between between two collections of observation vectors
+ squareform -- convert distance matrix to a condensed one and vice versa
+
+Predicates for checking the validity of distance matrices, both
+condensed and redundant. Also contained in this module are functions
+for computing the number of observations in a distance matrix.
+
+.. autosummary::
+ :toctree: generated/
+
+ is_valid_dm -- checks for a valid distance matrix
+ is_valid_y -- checks for a valid condensed distance matrix
+ num_obs_dm -- # of observations in a distance matrix
+ num_obs_y -- # of observations in a condensed distance matrix
+
+Distance functions between two vectors ``u`` and ``v``. Computing
+distances over a large collection of vectors is inefficient for these
+functions. Use ``pdist`` for this purpose.
+
+.. autosummary::
+ :toctree: generated/
+
+ braycurtis -- the Bray-Curtis distance.
+ canberra -- the Canberra distance.
+ chebyshev -- the Chebyshev distance.
+ cityblock -- the Manhattan distance.
+ correlation -- the Correlation distance.
+ cosine -- the Cosine distance.
+ dice -- the Dice dissimilarity (boolean).
+ euclidean -- the Euclidean distance.
+ hamming -- the Hamming distance (boolean).
+ jaccard -- the Jaccard distance (boolean).
+ kulsinski -- the Kulsinski distance (boolean).
+ mahalanobis -- the Mahalanobis distance.
+ matching -- the matching dissimilarity (boolean).
+ minkowski -- the Minkowski distance.
+ rogerstanimoto -- the Rogers-Tanimoto dissimilarity (boolean).
+ russellrao -- the Russell-Rao dissimilarity (boolean).
+ seuclidean -- the normalized Euclidean distance.
+ sokalmichener -- the Sokal-Michener dissimilarity (boolean).
+ sokalsneath -- the Sokal-Sneath dissimilarity (boolean).
+ sqeuclidean -- the squared Euclidean distance.
+ yule -- the Yule dissimilarity (boolean).
+
+
+References
+----------
+
+.. [Sta07] "Statistics toolbox." API Reference Documentation. The MathWorks.
+ http://www.mathworks.com/access/helpdesk/help/toolbox/stats/.
+ Accessed October 1, 2007.
+
+.. [Mti07] "Hierarchical clustering." API Reference Documentation.
+ The Wolfram Research, Inc.
+ http://reference.wolfram.com/mathematica/HierarchicalClustering/tutorial/HierarchicalClustering.html.
+ Accessed October 1, 2007.
+
+.. [Gow69] Gower, JC and Ross, GJS. "Minimum Spanning Trees and Single Linkage
+ Cluster Analysis." Applied Statistics. 18(1): pp. 54--64. 1969.
+
+.. [War63] Ward Jr, JH. "Hierarchical grouping to optimize an objective
+ function." Journal of the American Statistical Association. 58(301):
+ pp. 236--44. 1963.
+
+.. [Joh66] Johnson, SC. "Hierarchical clustering schemes." Psychometrika.
+ 32(2): pp. 241--54. 1966.
+
+.. [Sne62] Sneath, PH and Sokal, RR. "Numerical taxonomy." Nature. 193: pp.
+ 855--60. 1962.
+
+.. [Bat95] Batagelj, V. "Comparing resemblance measures." Journal of
+ Classification. 12: pp. 73--90. 1995.
+
+.. [Sok58] Sokal, RR and Michener, CD. "A statistical method for evaluating
+ systematic relationships." Scientific Bulletins. 38(22):
+ pp. 1409--38. 1958.
+
+.. [Ede79] Edelbrock, C. "Mixture model tests of hierarchical clustering
+ algorithms: the problem of classifying everybody." Multivariate
+ Behavioral Research. 14: pp. 367--84. 1979.
+
+.. [Jai88] Jain, A., and Dubes, R., "Algorithms for Clustering Data."
+ Prentice-Hall. Englewood Cliffs, NJ. 1988.
+
+.. [Fis36] Fisher, RA "The use of multiple measurements in taxonomic
+ problems." Annals of Eugenics, 7(2): 179-188. 1936
+
+
+Copyright Notice
+----------------
+
+Copyright (C) Damian Eads, 2007-2008. New BSD License.
+
+"""
+
+import warnings
+import numpy as np
+from numpy.linalg import norm
+
+import _distance_wrap
+
+
+def _copy_array_if_base_present(a):
+ """
+ Copies the array if its base points to a parent array.
+ """
+ if a.base is not None:
+ return a.copy()
+ elif np.issubsctype(a, np.float32):
+ return np.array(a, dtype=np.double)
+ else:
+ return a
+
+
+def _copy_arrays_if_base_present(T):
+ """
+ Accepts a tuple of arrays T. Copies the array T[i] if its base array
+ points to an actual array. Otherwise, the reference is just copied.
+ This is useful if the arrays are being passed to a C function that
+ does not do proper striding.
+ """
+ l = [_copy_array_if_base_present(a) for a in T]
+ return l
+
+
+def _convert_to_bool(X):
+ if X.dtype != np.bool:
+ X = np.bool_(X)
+ if not X.flags.contiguous:
+ X = X.copy()
+ return X
+
+
+def _convert_to_double(X):
+ if X.dtype != np.double:
+ X = np.double(X)
+ if not X.flags.contiguous:
+ X = X.copy()
+ return X
+
+
+def _validate_vector(u, dtype=None):
+ # XXX Is order='c' really necessary?
+ u = np.asarray(u, dtype=dtype, order='c').squeeze()
+ # Ensure values such as u=1 and u=[1] still return 1-D arrays.
+ u = np.atleast_1d(u)
+ if u.ndim > 1:
+ raise ValueError("Input vector should be 1-D.")
+ return u
+
+
+def minkowski(u, v, p):
+ r"""
+ Computes the Minkowski distance between two vectors ``u`` and ``v``,
+ defined as
+
+ .. math::
+
+ {||u-v||}_p = (\sum{|u_i - v_i|^p})^{1/p}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An n-dimensional vector.
+ v : ndarray
+ An n-dimensional vector.
+ p : int
+ The order of the norm of the difference :math:`{||u-v||}_p`.
+
+ Returns
+ -------
+ d : double
+ The Minkowski distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if p < 1:
+ raise ValueError("p must be at least 1")
+ dist = norm(u - v, ord=p)
+ return dist
+
+
+def wminkowski(u, v, p, w):
+ r"""
+ Computes the weighted Minkowski distance between two vectors ``u``
+ and ``v``, defined as
+
+ .. math::
+
+ \left(\sum{(w_i |u_i - v_i|^p)}\right)^{1/p}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+ p : int
+ The order of the norm of the difference :math:`{||u-v||}_p`.
+ w : ndarray
+ The weight vector.
+
+ Returns
+ -------
+ d : double
+ The Minkowski distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ w = _validate_vector(w)
+ if p < 1:
+ raise ValueError("p must be at least 1")
+ dist = norm(w * (u - v), ord=p)
+ return dist
+
+
+def euclidean(u, v):
+ """
+ Computes the Euclidean distance between two n-vectors ``u`` and ``v``,
+ which is defined as
+
+ .. math::
+
+ {||u-v||}_2
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Euclidean distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = norm(u - v)
+ return dist
+
+
+def sqeuclidean(u, v):
+ """
+ Computes the squared Euclidean distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ {||u-v||}_2^2.
+
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The squared Euclidean distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = ((u - v) ** 2).sum()
+ return dist
+
+
+def cosine(u, v):
+ r"""
+ Computes the Cosine distance between two n-vectors u and v, which
+ is defined as
+
+ .. math::
+
+ 1 - \frac{uv^T}
+ {||u||_2 ||v||_2}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Cosine distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v))
+ return dist
+
+
+def correlation(u, v):
+ r"""
+ Computes the correlation distance between two n-vectors ``u`` and
+ ``v``, which is defined as
+
+ .. math::
+
+ 1 - frac{(u - \bar{u}){(v - \bar{v})}^T}
+ {{||(u - \bar{u})||}_2 {||(v - \bar{v})||}_2^T}
+
+ where :math:`\bar{u}` is the mean of a vectors elements and ``n``
+ is the common dimensionality of ``u`` and ``v``.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The correlation distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ umu = u.mean()
+ vmu = v.mean()
+ um = u - umu
+ vm = v - vmu
+ dist = 1.0 - np.dot(um, vm) / (norm(um) * norm(vm))
+ return dist
+
+
+def hamming(u, v):
+ r"""
+ Computes the Hamming distance between two n-vectors ``u`` and
+ ``v``, which is simply the proportion of disagreeing components in
+ ``u`` and ``v``. If ``u`` and ``v`` are boolean vectors, the Hamming
+ distance is
+
+ .. math::
+
+ \frac{c_{01} + c_{10}}{n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Hamming distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ return (u != v).mean()
+
+
+def jaccard(u, v):
+ """
+ Computes the Jaccard-Needham dissimilarity between two boolean
+ n-vectors u and v, which is
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT}}
+ {c_{TT} + c_{FT} + c_{TF}}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Jaccard distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = (np.double(np.bitwise_and((u != v),
+ np.bitwise_or(u != 0, v != 0)).sum())
+ / np.double(np.bitwise_or(u != 0, v != 0).sum()))
+ return dist
+
+
+def kulsinski(u, v):
+ """
+ Computes the Kulsinski dissimilarity between two boolean n-vectors
+ u and v, which is defined as
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT} - c_{TT} + n}
+ {c_{FT} + c_{TF} + n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Kulsinski distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ n = float(len(u))
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+
+ return (ntf + nft - ntt + n) / (ntf + nft + n)
+
+
+def seuclidean(u, v, V):
+ """
+ Returns the standardized Euclidean distance between two n-vectors
+ ``u`` and ``v``. ``V`` is an n-dimensional vector of component
+ variances. It is usually computed among a larger collection
+ vectors.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+ V : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The standardized Euclidean distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ V = _validate_vector(V, dtype=np.float64)
+ if V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
+ raise TypeError('V must be a 1-D array of the same dimension '
+ 'as u and v.')
+ return np.sqrt(((u - v) ** 2 / V).sum())
+
+
+def cityblock(u, v):
+ """
+ Computes the Manhattan distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ \\sum_i {\\left| u_i - v_i \\right|}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The City Block distance between vectors ``u`` and ``v``.
+
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ return abs(u - v).sum()
+
+
+def mahalanobis(u, v, VI):
+ r"""
+ Computes the Mahalanobis distance between two n-vectors ``u`` and ``v``,
+ which is defiend as
+
+ .. math::
+
+ (u-v)V^{-1}(u-v)^T
+
+ where ``VI`` is the inverse covariance matrix :math:`V^{-1}`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Mahalanobis distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ VI = np.atleast_2d(VI)
+ delta = u - v
+ m = np.dot(np.dot(delta, VI), delta)
+ return np.sqrt(m)
+
+
+def chebyshev(u, v):
+ r"""
+ Computes the Chebyshev distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ \max_i {|u_i-v_i|}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Chebyshev distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ return max(abs(u - v))
+
+
+def braycurtis(u, v):
+ r"""
+ Computes the Bray-Curtis distance between two n-vectors ``u`` and
+ ``v``, which is defined as
+
+ .. math::
+
+ \sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
+
+ The Bray-Curtis distance is in the range [0, 1] if all coordinates are
+ positive, and is undefined if the inputs are of length zero.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Bray-Curtis distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v, dtype=np.float64)
+ return abs(u - v).sum() / abs(u + v).sum()
+
+
+def canberra(u, v):
+ r"""
+ Computes the Canberra distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Canberra distance between vectors ``u`` and ``v``.
+
+ Notes
+ -----
+ Whe u[i] and v[i] are 0 for given i, then the fraction 0/0 = 0 is used in
+ the calculation.
+
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v, dtype=np.float64)
+ olderr = np.seterr(invalid='ignore')
+ try:
+ d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
+ finally:
+ np.seterr(**olderr)
+ return d
+
+
+def _nbool_correspond_all(u, v):
+ if u.dtype != v.dtype:
+ raise TypeError("Arrays being compared must be of the same data type.")
+
+ if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+ not_u = 1.0 - u
+ not_v = 1.0 - v
+ nff = (not_u * not_v).sum()
+ nft = (not_u * v).sum()
+ ntf = (u * not_v).sum()
+ ntt = (u * v).sum()
+ elif u.dtype == np.bool:
+ not_u = ~u
+ not_v = ~v
+ nff = (not_u & not_v).sum()
+ nft = (not_u & v).sum()
+ ntf = (u & not_v).sum()
+ ntt = (u & v).sum()
+ else:
+ raise TypeError("Arrays being compared have unknown type.")
+
+ return (nff, nft, ntf, ntt)
+
+
+def _nbool_correspond_ft_tf(u, v):
+ if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+ not_u = 1.0 - u
+ not_v = 1.0 - v
+ nft = (not_u * v).sum()
+ ntf = (u * not_v).sum()
+ else:
+ not_u = ~u
+ not_v = ~v
+ nft = (not_u & v).sum()
+ ntf = (u & not_v).sum()
+ return (nft, ntf)
+
+
+def yule(u, v):
+ r"""
+ Computes the Yule dissimilarity between two boolean n-vectors u and v,
+ which is defined as
+
+
+ .. math::
+
+ \frac{R}{c_{TT} + c_{FF} + \frac{R}{2}}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n` and :math:`R = 2.0 * (c_{TF} + c_{FT})`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Yule dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+ return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft)
+
+
+def matching(u, v):
+ r"""
+ Computes the Matching dissimilarity between two boolean n-vectors
+ u and v, which is defined as
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT}}{n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Matching dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(nft + ntf) / float(len(u))
+
+
+def dice(u, v):
+ r"""
+ Computes the Dice dissimilarity between two boolean n-vectors
+ ``u`` and ``v``, which is
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT}}
+ {2c_{TT} + c_{FT} + c_{TF}}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Dice dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(ntf + nft) / float(2.0 * ntt + ntf + nft)
+
+
+def rogerstanimoto(u, v):
+ r"""
+ Computes the Rogers-Tanimoto dissimilarity between two boolean
+ n-vectors ``u`` and ``v``, which is defined as
+
+ .. math::
+ \frac{R}
+ {c_{TT} + c_{FF} + R}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n` and :math:`R = 2(c_{TF} + c_{FT})`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Rogers-Tanimoto dissimilarity between vectors
+ `u` and `v`.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+ return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))
+
+
+def russellrao(u, v):
+ r"""
+ Computes the Russell-Rao dissimilarity between two boolean n-vectors
+ ``u`` and ``v``, which is defined as
+
+ .. math::
+
+ \frac{n - c_{TT}}
+ {n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Russell-Rao dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ return float(len(u) - ntt) / float(len(u))
+
+
+def sokalmichener(u, v):
+ r"""
+ Computes the Sokal-Michener dissimilarity between two boolean vectors
+ ``u`` and ``v``, which is defined as
+
+ .. math::
+
+ \frac{R}
+ {S + R}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`, :math:`R = 2 * (c_{TF} + c_{FT})` and
+ :math:`S = c_{FF} + c_{TT}`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Sokal-Michener dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ nff = (~u & ~v).sum()
+ else:
+ ntt = (u * v).sum()
+ nff = ((1.0 - u) * (1.0 - v)).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(2.0 * (ntf + nft)) / float(ntt + nff + 2.0 * (ntf + nft))
+
+
+def sokalsneath(u, v):
+ r"""
+ Computes the Sokal-Sneath dissimilarity between two boolean vectors
+ ``u`` and ``v``,
+
+ .. math::
+
+ \frac{R}
+ {c_{TT} + R}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n` and :math:`R = 2(c_{TF} + c_{FT})`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Sokal-Sneath dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ denom = ntt + 2.0 * (ntf + nft)
+ if denom == 0:
+ raise ValueError('Sokal-Sneath dissimilarity is not defined for '
+ 'vectors that are entirely false.')
+ return float(2.0 * (ntf + nft)) / denom
+
+
+def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
+ r"""
+ Computes the pairwise distances between m original observations in
+ n-dimensional space. Returns a condensed distance matrix Y. For
+ each :math:`i` and :math:`j` (where :math:`i<j<n`), the
+ metric ``dist(u=X[i], v=X[j])`` is computed and stored in the
+ :math:`ij`th entry.
+
+ See ``squareform`` for information on how to calculate the index of
+ this entry or to convert the condensed distance matrix to a
+ redundant square matrix.
+
+ The following are common calling conventions.
+
+ 1. ``Y = pdist(X, 'euclidean')``
+
+ Computes the distance between m points using Euclidean distance
+ (2-norm) as the distance metric between the points. The points
+ are arranged as m n-dimensional row vectors in the matrix X.
+
+ 2. ``Y = pdist(X, 'minkowski', p)``
+
+ Computes the distances using the Minkowski distance
+ :math:`||u-v||_p` (p-norm) where :math:`p \geq 1`.
+
+ 3. ``Y = pdist(X, 'cityblock')``
+
+ Computes the city block or Manhattan distance between the
+ points.
+
+ 4. ``Y = pdist(X, 'seuclidean', V=None)``
+
+ Computes the standardized Euclidean distance. The standardized
+ Euclidean distance between two n-vectors ``u`` and ``v`` is
+
+ .. math::
+
+ \sqrt{\sum {(u_i-v_i)^2 / V[x_i]}}.
+
+
+ V is the variance vector; V[i] is the variance computed over all
+ the i'th components of the points. If not passed, it is
+ automatically computed.
+
+ 5. ``Y = pdist(X, 'sqeuclidean')``
+
+ Computes the squared Euclidean distance :math:`||u-v||_2^2` between
+ the vectors.
+
+ 6. ``Y = pdist(X, 'cosine')``
+
+ Computes the cosine distance between vectors u and v,
+
+ .. math::
+
+ 1 - \frac{uv^T}
+ {{|u|}_2 {|v|}_2}
+
+ where |*|_2 is the 2 norm of its argument *.
+
+ 7. ``Y = pdist(X, 'correlation')``
+
+ Computes the correlation distance between vectors u and v. This is
+
+ .. math::
+
+ 1 - \frac{(u - \bar{u})(v - \bar{v})^T}
+ {{|(u - \bar{u})|}{|(v - \bar{v})|}^T}
+
+ where :math:`\bar{v}` is the mean of the elements of vector v.
+
+ 8. ``Y = pdist(X, 'hamming')``
+
+ Computes the normalized Hamming distance, or the proportion of
+ those vector elements between two n-vectors ``u`` and ``v``
+ which disagree. To save memory, the matrix ``X`` can be of type
+ boolean.
+
+ 9. ``Y = pdist(X, 'jaccard')``
+
+ Computes the Jaccard distance between the points. Given two
+ vectors, ``u`` and ``v``, the Jaccard distance is the
+ proportion of those elements ``u[i]`` and ``v[i]`` that
+ disagree where at least one of them is non-zero.
+
+ 10. ``Y = pdist(X, 'chebyshev')``
+
+ Computes the Chebyshev distance between the points. The
+ Chebyshev distance between two n-vectors ``u`` and ``v`` is the
+ maximum norm-1 distance between their respective elements. More
+ precisely, the distance is given by
+
+ .. math::
+
+ d(u,v) = \max_i {|u_i-v_i|}.
+
+ 11. ``Y = pdist(X, 'canberra')``
+
+ Computes the Canberra distance between the points. The
+ Canberra distance between two points ``u`` and ``v`` is
+
+ .. math::
+
+ d(u,v) = \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}
+
+
+ 12. ``Y = pdist(X, 'braycurtis')``
+
+ Computes the Bray-Curtis distance between the points. The
+ Bray-Curtis distance between two points ``u`` and ``v`` is
+
+
+ .. math::
+
+ d(u,v) = \frac{\sum_i {u_i-v_i}}
+ {\sum_i {u_i+v_i}}
+
+ 13. ``Y = pdist(X, 'mahalanobis', VI=None)``
+
+ Computes the Mahalanobis distance between the points. The
+ Mahalanobis distance between two points ``u`` and ``v`` is
+ :math:`(u-v)(1/V)(u-v)^T` where :math:`(1/V)` (the ``VI``
+ variable) is the inverse covariance. If ``VI`` is not None,
+ ``VI`` will be used as the inverse covariance matrix.
+
+ 14. ``Y = pdist(X, 'yule')``
+
+ Computes the Yule distance between each pair of boolean
+ vectors. (see yule function documentation)
+
+ 15. ``Y = pdist(X, 'matching')``
+
+ Computes the matching distance between each pair of boolean
+ vectors. (see matching function documentation)
+
+ 16. ``Y = pdist(X, 'dice')``
+
+ Computes the Dice distance between each pair of boolean
+ vectors. (see dice function documentation)
+
+ 17. ``Y = pdist(X, 'kulsinski')``
+
+ Computes the Kulsinski distance between each pair of
+ boolean vectors. (see kulsinski function documentation)
+
+ 18. ``Y = pdist(X, 'rogerstanimoto')``
+
+ Computes the Rogers-Tanimoto distance between each pair of
+ boolean vectors. (see rogerstanimoto function documentation)
+
+ 19. ``Y = pdist(X, 'russellrao')``
+
+ Computes the Russell-Rao distance between each pair of
+ boolean vectors. (see russellrao function documentation)
+
+ 20. ``Y = pdist(X, 'sokalmichener')``
+
+ Computes the Sokal-Michener distance between each pair of
+ boolean vectors. (see sokalmichener function documentation)
+
+ 21. ``Y = pdist(X, 'sokalsneath')``
+
+ Computes the Sokal-Sneath distance between each pair of
+ boolean vectors. (see sokalsneath function documentation)
+
+ 22. ``Y = pdist(X, 'wminkowski')``
+
+ Computes the weighted Minkowski distance between each pair of
+ vectors. (see wminkowski function documentation)
+
+ 23. ``Y = pdist(X, f)``
+
+ Computes the distance between all pairs of vectors in X
+ using the user supplied 2-arity function f. For example,
+ Euclidean distance between the vectors could be computed
+ as follows::
+
+ dm = pdist(X, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
+
+ Note that you should avoid passing a reference to one of
+ the distance functions defined in this library. For example,::
+
+ dm = pdist(X, sokalsneath)
+
+ would calculate the pair-wise distances between the vectors in
+ X using the Python function sokalsneath. This would result in
+ sokalsneath being called :math:`{n \choose 2}` times, which
+ is inefficient. Instead, the optimized C version is more
+ efficient, and we call it using the following syntax.::
+
+ dm = pdist(X, 'sokalsneath')
+
+ Parameters
+ ----------
+ X : ndarray
+ An m by n array of m original observations in an
+ n-dimensional space.
+ metric : string or function
+ The distance metric to use. The distance function can
+ be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
+ 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
+ 'jaccard', 'kulsinski', 'mahalanobis', 'matching',
+ 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
+ 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
+ w : ndarray
+ The weight vector (for weighted Minkowski).
+ p : double
+ The p-norm to apply (for Minkowski, weighted and unweighted)
+ V : ndarray
+ The variance vector (for standardized Euclidean).
+ VI : ndarray
+ The inverse of the covariance matrix (for Mahalanobis).
+
+ Returns
+ -------
+ Y : ndarray
+ A condensed distance matrix.
+
+ See Also
+ --------
+ squareform : converts between condensed distance matrices and
+ square distance matrices.
+ """
+
+# 21. Y = pdist(X, 'test_Y')
+#
+# Computes the distance between all pairs of vectors in X
+# using the distance metric Y but with a more succinct,
+# verifiable, but less efficient implementation.
+
+ X = np.asarray(X, order='c')
+
+ # The C code doesn't do striding.
+ [X] = _copy_arrays_if_base_present([_convert_to_double(X)])
+
+ s = X.shape
+ if len(s) != 2:
+ raise ValueError('A 2-dimensional array must be passed.')
+
+ m, n = s
+ dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
+
+ wmink_names = ['wminkowski', 'wmi', 'wm', 'wpnorm']
+ if w is None and (metric == wminkowski or metric in wmink_names):
+ raise ValueError('weighted minkowski requires a weight '
+ 'vector `w` to be given.')
+
+ if callable(metric):
+ if metric == minkowski:
+ def dfun(u, v):
+ return minkowski(u, v, p)
+ elif metric == wminkowski:
+ def dfun(u, v):
+ return wminkowski(u, v, p, w)
+ elif metric == seuclidean:
+ def dfun(u, v):
+ return seuclidean(u, v, V)
+ elif metric == mahalanobis:
+ def dfun(u, v):
+ return mahalanobis(u, v, V)
+ else:
+ dfun = metric
+
+ k = 0
+ for i in xrange(0, m - 1):
+ for j in xrange(i + 1, m):
+ dm[k] = dfun(X[i], X[j])
+ k = k + 1
+
+ elif isinstance(metric, basestring):
+ mstr = metric.lower()
+
+ #if X.dtype != np.double and \
+ # (mstr != 'hamming' and mstr != 'jaccard'):
+ # TypeError('A double array must be passed.')
+ if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
+ _distance_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
+ _distance_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+ dm = dm ** 2.0
+ elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
+ _distance_wrap.pdist_city_block_wrap(X, dm)
+ elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
+ if X.dtype == np.bool:
+ _distance_wrap.pdist_hamming_bool_wrap(_convert_to_bool(X), dm)
+ else:
+ _distance_wrap.pdist_hamming_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
+ if X.dtype == np.bool:
+ _distance_wrap.pdist_jaccard_bool_wrap(_convert_to_bool(X), dm)
+ else:
+ _distance_wrap.pdist_jaccard_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
+ _distance_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['minkowski', 'mi', 'm']):
+ _distance_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
+ elif mstr in wmink_names:
+ _distance_wrap.pdist_weighted_minkowski_wrap(_convert_to_double(X),
+ dm, p, np.asarray(w))
+ elif mstr in set(['seuclidean', 'se', 's']):
+ if V is not None:
+ V = np.asarray(V, order='c')
+ if type(V) != np.ndarray:
+ raise TypeError('Variance vector V must be a numpy array')
+ if V.dtype != np.double:
+ raise TypeError('Variance vector V must contain doubles.')
+ if len(V.shape) != 1:
+ raise ValueError('Variance vector V must '
+ 'be one-dimensional.')
+ if V.shape[0] != n:
+ raise ValueError('Variance vector V must be of the same '
+ 'dimension as the vectors on which the distances '
+ 'are computed.')
+ # The C code doesn't do striding.
+ [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
+ else:
+ VV = np.var(X, axis=0, ddof=1)
+ _distance_wrap.pdist_seuclidean_wrap(_convert_to_double(X), VV, dm)
+ # Need to test whether vectorized cosine works better.
+ # Find out: Is there a dot subtraction operator so I can
+ # subtract matrices in a similar way to multiplying them?
+ # Need to get rid of as much unnecessary C code as possible.
+ elif mstr in set(['cosine', 'cos']):
+ norms = np.sqrt(np.sum(X * X, axis=1))
+ _distance_wrap.pdist_cosine_wrap(_convert_to_double(X), dm, norms)
+ elif mstr in set(['old_cosine', 'old_cos']):
+ norms = np.sqrt(np.sum(X * X, axis=1))
+ nV = norms.reshape(m, 1)
+ # The numerator u * v
+ nm = np.dot(X, X.T)
+ # The denom. ||u||*||v||
+ de = np.dot(nV, nV.T)
+ dm = 1.0 - (nm / de)
+ dm[xrange(0, m), xrange(0, m)] = 0.0
+ dm = squareform(dm)
+ elif mstr in set(['correlation', 'co']):
+ X2 = X - X.mean(1)[:, np.newaxis]
+ #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
+ norms = np.sqrt(np.sum(X2 * X2, axis=1))
+ _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2),
+ _convert_to_double(dm),
+ _convert_to_double(norms))
+ elif mstr in set(['mahalanobis', 'mahal', 'mah']):
+ if VI is not None:
+ VI = _convert_to_double(np.asarray(VI, order='c'))
+ if type(VI) != np.ndarray:
+ raise TypeError('VI must be a numpy array.')
+ if VI.dtype != np.double:
+ raise TypeError('The array must contain 64-bit floats.')
+ [VI] = _copy_arrays_if_base_present([VI])
+ else:
+ V = np.cov(X.T)
+ VI = _convert_to_double(np.linalg.inv(V).T.copy())
+ # (u-v)V^(-1)(u-v)^T
+ _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X),
+ VI, dm)
+ elif mstr == 'canberra':
+ _distance_wrap.pdist_canberra_wrap(_convert_to_double(X), dm)
+ elif mstr == 'braycurtis':
+ _distance_wrap.pdist_bray_curtis_wrap(_convert_to_double(X), dm)
+ elif mstr == 'yule':
+ _distance_wrap.pdist_yule_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'matching':
+ _distance_wrap.pdist_matching_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'kulsinski':
+ _distance_wrap.pdist_kulsinski_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'dice':
+ _distance_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'rogerstanimoto':
+ _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X),
+ dm)
+ elif mstr == 'russellrao':
+ _distance_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'sokalmichener':
+ _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X),
+ dm)
+ elif mstr == 'sokalsneath':
+ _distance_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm)
+ elif metric == 'test_euclidean':
+ dm = pdist(X, euclidean)
+ elif metric == 'test_sqeuclidean':
+ if V is None:
+ V = np.var(X, axis=0, ddof=1)
+ else:
+ V = np.asarray(V, order='c')
+ dm = pdist(X, lambda u, v: seuclidean(u, v, V))
+ elif metric == 'test_braycurtis':
+ dm = pdist(X, braycurtis)
+ elif metric == 'test_mahalanobis':
+ if VI is None:
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V)
+ else:
+ VI = np.asarray(VI, order='c')
+ [VI] = _copy_arrays_if_base_present([VI])
+ # (u-v)V^(-1)(u-v)^T
+ dm = pdist(X, (lambda u, v: mahalanobis(u, v, VI)))
+ elif metric == 'test_canberra':
+ dm = pdist(X, canberra)
+ elif metric == 'test_cityblock':
+ dm = pdist(X, cityblock)
+ elif metric == 'test_minkowski':
+ dm = pdist(X, minkowski, p=p)
+ elif metric == 'test_wminkowski':
+ dm = pdist(X, wminkowski, p=p, w=w)
+ elif metric == 'test_cosine':
+ dm = pdist(X, cosine)
+ elif metric == 'test_correlation':
+ dm = pdist(X, correlation)
+ elif metric == 'test_hamming':
+ dm = pdist(X, hamming)
+ elif metric == 'test_jaccard':
+ dm = pdist(X, jaccard)
+ elif metric == 'test_chebyshev' or metric == 'test_chebychev':
+ dm = pdist(X, chebyshev)
+ elif metric == 'test_yule':
+ dm = pdist(X, yule)
+ elif metric == 'test_matching':
+ dm = pdist(X, matching)
+ elif metric == 'test_dice':
+ dm = pdist(X, dice)
+ elif metric == 'test_kulsinski':
+ dm = pdist(X, kulsinski)
+ elif metric == 'test_rogerstanimoto':
+ dm = pdist(X, rogerstanimoto)
+ elif metric == 'test_russellrao':
+ dm = pdist(X, russellrao)
+ elif metric == 'test_sokalsneath':
+ dm = pdist(X, sokalsneath)
+ elif metric == 'test_sokalmichener':
+ dm = pdist(X, sokalmichener)
+ else:
+ raise ValueError('Unknown Distance Metric: %s' % mstr)
+ else:
+ raise TypeError('2nd argument metric must be a string identifier '
+ 'or a function.')
+ return dm
+
+
+def squareform(X, force="no", checks=True):
+ r"""
+ Converts a vector-form distance vector to a square-form distance
+ matrix, and vice-versa.
+
+ Parameters
+ ----------
+ X : ndarray
+ Either a condensed or redundant distance matrix.
+
+ Returns
+ -------
+ Y : ndarray
+ If a condensed distance matrix is passed, a redundant
+ one is returned, or if a redundant one is passed, a
+ condensed distance matrix is returned.
+
+ force : string
+ As with MATLAB(TM), if force is equal to 'tovector' or
+ 'tomatrix', the input will be treated as a distance matrix
+ or distance vector respectively.
+
+ checks : bool
+ If ``checks`` is set to ``False``, no checks will be made
+ for matrix symmetry nor zero diagonals. This is useful if
+ it is known that ``X - X.T1`` is small and ``diag(X)`` is
+ close to zero. These values are ignored any way so they do
+ not disrupt the squareform transformation.
+
+
+ Calling Conventions
+ -------------------
+
+ 1. v = squareform(X)
+
+ Given a square d by d symmetric distance matrix ``X``,
+ ``v=squareform(X)`` returns a :math:`d*(d-1)/2` (or
+ `${n \choose 2}$`) sized vector v.
+
+ v[{n \choose 2}-{n-i \choose 2} + (j-i-1)] is the distance
+ between points i and j. If X is non-square or asymmetric, an error
+ is returned.
+
+ X = squareform(v)
+
+ Given a d*d(-1)/2 sized v for some integer d>=2 encoding distances
+ as described, X=squareform(v) returns a d by d distance matrix X. The
+ X[i, j] and X[j, i] values are set to
+ v[{n \choose 2}-{n-i \choose 2} + (j-u-1)] and all
+ diagonal elements are zero.
+
+ """
+
+ X = _convert_to_double(np.asarray(X, order='c'))
+
+ if not np.issubsctype(X, np.double):
+ raise TypeError('A double array must be passed.')
+
+ s = X.shape
+
+ if force.lower() == 'tomatrix':
+ if len(s) != 1:
+ raise ValueError("Forcing 'tomatrix' but input X is not a "
+ "distance vector.")
+ elif force.lower() == 'tovector':
+ if len(s) != 2:
+ raise ValueError("Forcing 'tovector' but input X is not a "
+ "distance matrix.")
+
+ # X = squareform(v)
+ if len(s) == 1:
+ if X.shape[0] == 0:
+ return np.zeros((1, 1), dtype=np.double)
+
+ # Grab the closest value to the square root of the number
+ # of elements times 2 to see if the number of elements
+ # is indeed a binomial coefficient.
+ d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
+
+ # Check that v is of valid dimensions.
+ if d * (d - 1) / 2 != int(s[0]):
+ raise ValueError('Incompatible vector size. It must be a binomial '
+ 'coefficient n choose 2 for some integer n >= 2.')
+
+ # Allocate memory for the distance matrix.
+ M = np.zeros((d, d), dtype=np.double)
+
+ # Since the C code does not support striding using strides.
+ # The dimensions are used instead.
+ [X] = _copy_arrays_if_base_present([X])
+
+ # Fill in the values of the distance matrix.
+ _distance_wrap.to_squareform_from_vector_wrap(M, X)
+
+ # Return the distance matrix.
+ M = M + M.transpose()
+ return M
+ elif len(s) == 2:
+ if s[0] != s[1]:
+ raise ValueError('The matrix argument must be square.')
+ if checks:
+ is_valid_dm(X, throw=True, name='X')
+
+ # One-side of the dimensions is set here.
+ d = s[0]
+
+ if d <= 1:
+ return np.array([], dtype=np.double)
+
+ # Create a vector.
+ v = np.zeros(((d * (d - 1) / 2),), dtype=np.double)
+
+ # Since the C code does not support striding using strides.
+ # The dimensions are used instead.
+ [X] = _copy_arrays_if_base_present([X])
+
+ # Convert the vector to squareform.
+ _distance_wrap.to_vector_from_squareform_wrap(X, v)
+ return v
+ else:
+ raise ValueError(('The first argument must be one or two dimensional '
+ 'array. A %d-dimensional array is not '
+ 'permitted') % len(s))
+
+
+def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False):
+ """
+ Returns True if the variable D passed is a valid distance matrix.
+ Distance matrices must be 2-dimensional numpy arrays containing
+ doubles. They must have a zero-diagonal, and they must be symmetric.
+
+ Parameters
+ ----------
+ D : ndarray
+ The candidate object to test for validity.
+ tol : double
+ The distance matrix should be symmetric. tol is the maximum
+ difference between the :math:`ij`th entry and the
+ :math:`ji`th entry for the distance metric to be
+ considered symmetric.
+ throw : bool
+ An exception is thrown if the distance matrix passed is not
+ valid.
+ name : string
+ the name of the variable to checked. This is useful if
+ throw is set to ``True`` so the offending variable can be
+ identified in the exception message when an exception is
+ thrown.
+ warning : bool
+ Instead of throwing an exception, a warning message is
+ raised.
+
+ Returns
+ -------
+ Returns ``True`` if the variable ``D`` passed is a valid
+ distance matrix. Small numerical differences in ``D`` and
+ ``D.T`` and non-zeroness of the diagonal are ignored if they are
+ within the tolerance specified by ``tol``.
+ """
+ D = np.asarray(D, order='c')
+ valid = True
+ try:
+ s = D.shape
+ if D.dtype != np.double:
+ if name:
+ raise TypeError(('Distance matrix \'%s\' must contain doubles '
+ '(double).') % name)
+ else:
+ raise TypeError('Distance matrix must contain doubles '
+ '(double).')
+ if len(D.shape) != 2:
+ if name:
+ raise ValueError(('Distance matrix \'%s\' must have shape=2 '
+ '(i.e. be two-dimensional).') % name)
+ else:
+ raise ValueError('Distance matrix must have shape=2 (i.e. '
+ 'be two-dimensional).')
+ if tol == 0.0:
+ if not (D == D.T).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' must be '
+ 'symmetric.') % name)
+ else:
+ raise ValueError('Distance matrix must be symmetric.')
+ if not (D[xrange(0, s[0]), xrange(0, s[0])] == 0).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' diagonal must '
+ 'be zero.') % name)
+ else:
+ raise ValueError('Distance matrix diagonal must be zero.')
+ else:
+ if not (D - D.T <= tol).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' must be '
+ 'symmetric within tolerance %d.')
+ % (name, tol))
+ else:
+ raise ValueError('Distance matrix must be symmetric within'
+ ' tolerance %5.5f.' % tol)
+ if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' diagonal must be'
+ ' close to zero within tolerance %5.5f.')
+ % (name, tol))
+ else:
+ raise ValueError(('Distance matrix \'%s\' diagonal must be'
+ ' close to zero within tolerance %5.5f.')
+ % tol)
+ except Exception, e:
+ if throw:
+ raise
+ if warning:
+ warnings.warn(str(e))
+ valid = False
+ return valid
+
+
+def is_valid_y(y, warning=False, throw=False, name=None):
+ r"""
+ Returns ``True`` if the variable ``y`` passed is a valid condensed
+ distance matrix. Condensed distance matrices must be 1-dimensional
+ numpy arrays containing doubles. Their length must be a binomial
+ coefficient :math:`{n \choose 2}` for some positive integer n.
+
+
+ Parameters
+ ----------
+ y : ndarray
+ The condensed distance matrix.
+ warning : bool, optional
+ Invokes a warning if the variable passed is not a valid
+ condensed distance matrix. The warning message explains why
+ the distance matrix is not valid. 'name' is used when
+ referencing the offending variable.
+ throws : throw, optional
+ Throws an exception if the variable passed is not a valid
+ condensed distance matrix.
+ name : bool, optional
+ Used when referencing the offending variable in the
+ warning or exception message.
+
+ """
+ y = np.asarray(y, order='c')
+ valid = True
+ try:
+ if type(y) != np.ndarray:
+ if name:
+ raise TypeError(('\'%s\' passed as a condensed distance '
+ 'matrix is not a numpy array.') % name)
+ else:
+ raise TypeError('Variable is not a numpy array.')
+ if y.dtype != np.double:
+ if name:
+ raise TypeError(('Condensed distance matrix \'%s\' must '
+ 'contain doubles (double).') % name)
+ else:
+ raise TypeError('Condensed distance matrix must contain '
+ 'doubles (double).')
+ if len(y.shape) != 1:
+ if name:
+ raise ValueError(('Condensed distance matrix \'%s\' must '
+ 'have shape=1 (i.e. be one-dimensional).')
+ % name)
+ else:
+ raise ValueError('Condensed distance matrix must have shape=1 '
+ '(i.e. be one-dimensional).')
+ n = y.shape[0]
+ d = int(np.ceil(np.sqrt(n * 2)))
+ if (d * (d - 1) / 2) != n:
+ if name:
+ raise ValueError(('Length n of condensed distance matrix '
+ '\'%s\' must be a binomial coefficient, i.e.'
+ 'there must be a k such that '
+ '(k \choose 2)=n)!') % name)
+ else:
+ raise ValueError('Length n of condensed distance matrix must '
+ 'be a binomial coefficient, i.e. there must '
+ 'be a k such that (k \choose 2)=n)!')
+ except Exception, e:
+ if throw:
+ raise
+ if warning:
+ warnings.warn(str(e))
+ valid = False
+ return valid
+
+
+def num_obs_dm(d):
+ """
+ Returns the number of original observations that correspond to a
+ square, redundant distance matrix ``D``.
+
+ Parameters
+ ----------
+ d : ndarray
+ The target distance matrix.
+
+ Returns
+ -------
+ numobs : int
+ The number of observations in the redundant distance matrix.
+ """
+ d = np.asarray(d, order='c')
+ is_valid_dm(d, tol=np.inf, throw=True, name='d')
+ return d.shape[0]
+
+
+def num_obs_y(Y):
+ """
+ Returns the number of original observations that correspond to a
+ condensed distance matrix ``Y``.
+
+ Parameters
+ ----------
+ Y : ndarray
+ The number of original observations in the condensed
+ observation ``Y``.
+
+ Returns
+ -------
+ n : int
+ The number of observations in the condensed distance matrix
+ passed.
+ """
+ Y = np.asarray(Y, order='c')
+ is_valid_y(Y, throw=True, name='Y')
+ k = Y.shape[0]
+ if k == 0:
+ raise ValueError("The number of observations cannot be determined on "
+ "an empty distance matrix.")
+ d = int(np.ceil(np.sqrt(k * 2)))
+ if (d * (d - 1) / 2) != k:
+ raise ValueError("Invalid condensed distance matrix passed. Must be "
+ "some k where k=(n choose 2) for some n >= 2.")
+ return d
+
+
+def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
+ r"""
+ Computes distance between each pair of observation vectors in the
+ Cartesian product of two collections of vectors. ``XA`` is a
+ :math:`m_A` by :math:`n` array while ``XB`` is a :math:`m_B` by
+ :math:`n` array. A :math:`m_A` by :math:`m_B` array is
+ returned. An exception is thrown if ``XA`` and ``XB`` do not have
+ the same number of columns.
+
+ A rectangular distance matrix ``Y`` is returned. For each :math:`i`
+ and :math:`j`, the metric ``dist(u=XA[i], v=XB[j])`` is computed
+ and stored in the :math:`ij` th entry.
+
+ The following are common calling conventions:
+
+ 1. ``Y = cdist(XA, XB, 'euclidean')``
+
+ Computes the distance between :math:`m` points using
+ Euclidean distance (2-norm) as the distance metric between the
+ points. The points are arranged as :math:`m`
+ :math:`n`-dimensional row vectors in the matrix X.
+
+ 2. ``Y = cdist(XA, XB, 'minkowski', p)``
+
+ Computes the distances using the Minkowski distance
+ :math:`||u-v||_p` (:math:`p`-norm) where :math:`p \geq 1`.
+
+ 3. ``Y = cdist(XA, XB, 'cityblock')``
+
+ Computes the city block or Manhattan distance between the
+ points.
+
+ 4. ``Y = cdist(XA, XB, 'seuclidean', V=None)``
+
+ Computes the standardized Euclidean distance. The standardized
+ Euclidean distance between two n-vectors ``u`` and ``v`` is
+
+ .. math::
+
+ \sqrt{\sum {(u_i-v_i)^2 / V[x_i]}}.
+
+ V is the variance vector; V[i] is the variance computed over all
+ the i'th components of the points. If not passed, it is
+ automatically computed.
+
+ 5. ``Y = cdist(XA, XB, 'sqeuclidean')``
+
+ Computes the squared Euclidean distance :math:`||u-v||_2^2` between
+ the vectors.
+
+ 6. ``Y = cdist(XA, XB, 'cosine')``
+
+ Computes the cosine distance between vectors u and v,
+
+ .. math::
+
+ \frac{1 - uv^T}
+ {{|u|}_2 {|v|}_2}
+
+ where :math:`|*|_2` is the 2-norm of its argument *.
+
+ 7. ``Y = cdist(XA, XB, 'correlation')``
+
+ Computes the correlation distance between vectors u and v. This is
+
+ .. math::
+
+ \frac{1 - (u - n{|u|}_1){(v - n{|v|}_1)}^T}
+ {{|(u - n{|u|}_1)|}_2 {|(v - n{|v|}_1)|}^T}
+
+ where :math:`|*|_1` is the Manhattan (or 1-norm) of its
+ argument, and :math:`n` is the common dimensionality of the
+ vectors.
+
+ 8. ``Y = cdist(XA, XB, 'hamming')``
+
+ Computes the normalized Hamming distance, or the proportion of
+ those vector elements between two n-vectors ``u`` and ``v``
+ which disagree. To save memory, the matrix ``X`` can be of type
+ boolean.
+
+ 9. ``Y = cdist(XA, XB, 'jaccard')``
+
+ Computes the Jaccard distance between the points. Given two
+ vectors, ``u`` and ``v``, the Jaccard distance is the
+ proportion of those elements ``u[i]`` and ``v[i]`` that
+ disagree where at least one of them is non-zero.
+
+ 10. ``Y = cdist(XA, XB, 'chebyshev')``
+
+ Computes the Chebyshev distance between the points. The
+ Chebyshev distance between two n-vectors ``u`` and ``v`` is the
+ maximum norm-1 distance between their respective elements. More
+ precisely, the distance is given by
+
+ .. math::
+
+ d(u,v) = \max_i {|u_i-v_i|}.
+
+ 11. ``Y = cdist(XA, XB, 'canberra')``
+
+ Computes the Canberra distance between the points. The
+ Canberra distance between two points ``u`` and ``v`` is
+
+ .. math::
+
+ d(u,v) = \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}
+
+
+ 12. ``Y = cdist(XA, XB, 'braycurtis')``
+
+ Computes the Bray-Curtis distance between the points. The
+ Bray-Curtis distance between two points ``u`` and ``v`` is
+
+
+ .. math::
+
+ d(u,v) = \frac{\sum_i (u_i-v_i)}
+ {\sum_i (u_i+v_i)}
+
+ 13. ``Y = cdist(XA, XB, 'mahalanobis', VI=None)``
+
+ Computes the Mahalanobis distance between the points. The
+ Mahalanobis distance between two points ``u`` and ``v`` is
+ :math:`(u-v)(1/V)(u-v)^T` where :math:`(1/V)` (the ``VI``
+ variable) is the inverse covariance. If ``VI`` is not None,
+ ``VI`` will be used as the inverse covariance matrix.
+
+ 14. ``Y = cdist(XA, XB, 'yule')``
+
+ Computes the Yule distance between the boolean
+ vectors. (see yule function documentation)
+
+ 15. ``Y = cdist(XA, XB, 'matching')``
+
+ Computes the matching distance between the boolean
+ vectors. (see matching function documentation)
+
+ 16. ``Y = cdist(XA, XB, 'dice')``
+
+ Computes the Dice distance between the boolean vectors. (see
+ dice function documentation)
+
+ 17. ``Y = cdist(XA, XB, 'kulsinski')``
+
+ Computes the Kulsinski distance between the boolean
+ vectors. (see kulsinski function documentation)
+
+ 18. ``Y = cdist(XA, XB, 'rogerstanimoto')``
+
+ Computes the Rogers-Tanimoto distance between the boolean
+ vectors. (see rogerstanimoto function documentation)
+
+ 19. ``Y = cdist(XA, XB, 'russellrao')``
+
+ Computes the Russell-Rao distance between the boolean
+ vectors. (see russellrao function documentation)
+
+ 20. ``Y = cdist(XA, XB, 'sokalmichener')``
+
+ Computes the Sokal-Michener distance between the boolean
+ vectors. (see sokalmichener function documentation)
+
+ 21. ``Y = cdist(XA, XB, 'sokalsneath')``
+
+ Computes the Sokal-Sneath distance between the vectors. (see
+ sokalsneath function documentation)
+
+
+ 22. ``Y = cdist(XA, XB, 'wminkowski')``
+
+ Computes the weighted Minkowski distance between the
+ vectors. (see sokalsneath function documentation)
+
+ 23. ``Y = cdist(XA, XB, f)``
+
+ Computes the distance between all pairs of vectors in X
+ using the user supplied 2-arity function f. For example,
+ Euclidean distance between the vectors could be computed
+ as follows::
+
+ dm = cdist(XA, XB, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
+
+ Note that you should avoid passing a reference to one of
+ the distance functions defined in this library. For example,::
+
+ dm = cdist(XA, XB, sokalsneath)
+
+ would calculate the pair-wise distances between the vectors in
+ X using the Python function sokalsneath. This would result in
+ sokalsneath being called :math:`{n \choose 2}` times, which
+ is inefficient. Instead, the optimized C version is more
+ efficient, and we call it using the following syntax.::
+
+ dm = cdist(XA, XB, 'sokalsneath')
+
+ Parameters
+ ----------
+ XA : ndarray
+ An :math:`m_A` by :math:`n` array of :math:`m_A`
+ original observations in an :math:`n`-dimensional space.
+ XB : ndarray
+ An :math:`m_B` by :math:`n` array of :math:`m_B`
+ original observations in an :math:`n`-dimensional space.
+ metric : string or function
+ The distance metric to use. The distance function can
+ be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
+ 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
+ 'jaccard', 'kulsinski', 'mahalanobis', 'matching',
+ 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
+ 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski',
+ 'yule'.
+ w : ndarray
+ The weight vector (for weighted Minkowski).
+ p : double
+ The p-norm to apply (for Minkowski, weighted and unweighted)
+ V : ndarray
+ The variance vector (for standardized Euclidean).
+ VI : ndarray
+ The inverse of the covariance matrix (for Mahalanobis).
+
+
+ Returns
+ -------
+ Y : ndarray
+ A :math:`m_A` by :math:`m_B` distance matrix.
+ """
+
+# 21. Y = cdist(XA, XB, 'test_Y')
+#
+# Computes the distance between all pairs of vectors in X
+# using the distance metric Y but with a more succint,
+# verifiable, but less efficient implementation.
+
+ XA = np.asarray(XA, order='c')
+ XB = np.asarray(XB, order='c')
+
+ #if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
+ # raise TypeError('Floating point arrays must be 64-bit (got %r).' %
+ # (X.dtype.type,))
+
+ # The C code doesn't do striding.
+ [XA] = _copy_arrays_if_base_present([_convert_to_double(XA)])
+ [XB] = _copy_arrays_if_base_present([_convert_to_double(XB)])
+
+ s = XA.shape
+ sB = XB.shape
+
+ if len(s) != 2:
+ raise ValueError('XA must be a 2-dimensional array.')
+ if len(sB) != 2:
+ raise ValueError('XB must be a 2-dimensional array.')
+ if s[1] != sB[1]:
+ raise ValueError('XA and XB must have the same number of columns '
+ '(i.e. feature dimension.)')
+
+ mA = s[0]
+ mB = sB[0]
+ n = s[1]
+ dm = np.zeros((mA, mB), dtype=np.double)
+
+ if callable(metric):
+ if metric == minkowski:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = minkowski(XA[i, :], XB[j, :], p)
+ elif metric == wminkowski:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = wminkowski(XA[i, :], XB[j, :], p, w)
+ elif metric == seuclidean:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = seuclidean(XA[i, :], XB[j, :], V)
+ elif metric == mahalanobis:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = mahalanobis(XA[i, :], XB[j, :], V)
+ else:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = metric(XA[i, :], XB[j, :])
+ elif isinstance(metric, basestring):
+ mstr = metric.lower()
+
+ #if XA.dtype != np.double and \
+ # (mstr != 'hamming' and mstr != 'jaccard'):
+ # TypeError('A double array must be passed.')
+ if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
+ _distance_wrap.cdist_euclidean_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
+ _distance_wrap.cdist_euclidean_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ dm **= 2.0
+ elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
+ _distance_wrap.cdist_city_block_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
+ if XA.dtype == np.bool:
+ _distance_wrap.cdist_hamming_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ else:
+ _distance_wrap.cdist_hamming_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
+ if XA.dtype == np.bool:
+ _distance_wrap.cdist_jaccard_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ else:
+ _distance_wrap.cdist_jaccard_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
+ _distance_wrap.cdist_chebyshev_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['minkowski', 'mi', 'm', 'pnorm']):
+ _distance_wrap.cdist_minkowski_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm, p)
+ elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']):
+ _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(XA),
+ _convert_to_double(XB),
+ dm, p,
+ _convert_to_double(w))
+ elif mstr in set(['seuclidean', 'se', 's']):
+ if V is not None:
+ V = np.asarray(V, order='c')
+ if type(V) != np.ndarray:
+ raise TypeError('Variance vector V must be a numpy array')
+ if V.dtype != np.double:
+ raise TypeError('Variance vector V must contain doubles.')
+ if len(V.shape) != 1:
+ raise ValueError('Variance vector V must be '
+ 'one-dimensional.')
+ if V.shape[0] != n:
+ raise ValueError('Variance vector V must be of the same '
+ 'dimension as the vectors on which the '
+ 'distances are computed.')
+ # The C code doesn't do striding.
+ [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
+ else:
+ X = np.vstack([XA, XB])
+ VV = np.var(X, axis=0, ddof=1)
+ X = None
+ del X
+ _distance_wrap.cdist_seuclidean_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), VV, dm)
+ # Need to test whether vectorized cosine works better.
+ # Find out: Is there a dot subtraction operator so I can
+ # subtract matrices in a similar way to multiplying them?
+ # Need to get rid of as much unnecessary C code as possible.
+ elif mstr in set(['cosine', 'cos']):
+ normsA = np.sqrt(np.sum(XA * XA, axis=1))
+ normsB = np.sqrt(np.sum(XB * XB, axis=1))
+ _distance_wrap.cdist_cosine_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm,
+ normsA,
+ normsB)
+ elif mstr in set(['correlation', 'co']):
+ XA2 = XA - XA.mean(1)[:, np.newaxis]
+ XB2 = XB - XB.mean(1)[:, np.newaxis]
+ #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
+ normsA = np.sqrt(np.sum(XA2 * XA2, axis=1))
+ normsB = np.sqrt(np.sum(XB2 * XB2, axis=1))
+ _distance_wrap.cdist_cosine_wrap(_convert_to_double(XA2),
+ _convert_to_double(XB2),
+ _convert_to_double(dm),
+ _convert_to_double(normsA),
+ _convert_to_double(normsB))
+ elif mstr in set(['mahalanobis', 'mahal', 'mah']):
+ if VI is not None:
+ VI = _convert_to_double(np.asarray(VI, order='c'))
+ if type(VI) != np.ndarray:
+ raise TypeError('VI must be a numpy array.')
+ if VI.dtype != np.double:
+ raise TypeError('The array must contain 64-bit floats.')
+ [VI] = _copy_arrays_if_base_present([VI])
+ else:
+ X = np.vstack([XA, XB])
+ V = np.cov(X.T)
+ X = None
+ del X
+ VI = _convert_to_double(np.linalg.inv(V).T.copy())
+ # (u-v)V^(-1)(u-v)^T
+ _distance_wrap.cdist_mahalanobis_wrap(_convert_to_double(XA),
+ _convert_to_double(XB),
+ VI, dm)
+ elif mstr == 'canberra':
+ _distance_wrap.cdist_canberra_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr == 'braycurtis':
+ _distance_wrap.cdist_bray_curtis_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr == 'yule':
+ _distance_wrap.cdist_yule_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'matching':
+ _distance_wrap.cdist_matching_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'kulsinski':
+ _distance_wrap.cdist_kulsinski_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'dice':
+ _distance_wrap.cdist_dice_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'rogerstanimoto':
+ _distance_wrap.cdist_rogerstanimoto_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ elif mstr == 'russellrao':
+ _distance_wrap.cdist_russellrao_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'sokalmichener':
+ _distance_wrap.cdist_sokalmichener_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ elif mstr == 'sokalsneath':
+ _distance_wrap.cdist_sokalsneath_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ elif metric == 'test_euclidean':
+ dm = cdist(XA, XB, euclidean)
+ elif metric == 'test_seuclidean':
+ if V is None:
+ V = np.var(np.vstack([XA, XB]), axis=0, ddof=1)
+ else:
+ V = np.asarray(V, order='c')
+ dm = cdist(XA, XB, lambda u, v: seuclidean(u, v, V))
+ elif metric == 'test_sqeuclidean':
+ dm = cdist(XA, XB, lambda u, v: sqeuclidean(u, v))
+ elif metric == 'test_braycurtis':
+ dm = cdist(XA, XB, braycurtis)
+ elif metric == 'test_mahalanobis':
+ if VI is None:
+ X = np.vstack([XA, XB])
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V)
+ X = None
+ del X
+ else:
+ VI = np.asarray(VI, order='c')
+ [VI] = _copy_arrays_if_base_present([VI])
+ # (u-v)V^(-1)(u-v)^T
+ dm = cdist(XA, XB, (lambda u, v: mahalanobis(u, v, VI)))
+ elif metric == 'test_canberra':
+ dm = cdist(XA, XB, canberra)
+ elif metric == 'test_cityblock':
+ dm = cdist(XA, XB, cityblock)
+ elif metric == 'test_minkowski':
+ dm = cdist(XA, XB, minkowski, p=p)
+ elif metric == 'test_wminkowski':
+ dm = cdist(XA, XB, wminkowski, p=p, w=w)
+ elif metric == 'test_cosine':
+ dm = cdist(XA, XB, cosine)
+ elif metric == 'test_correlation':
+ dm = cdist(XA, XB, correlation)
+ elif metric == 'test_hamming':
+ dm = cdist(XA, XB, hamming)
+ elif metric == 'test_jaccard':
+ dm = cdist(XA, XB, jaccard)
+ elif metric == 'test_chebyshev' or metric == 'test_chebychev':
+ dm = cdist(XA, XB, chebyshev)
+ elif metric == 'test_yule':
+ dm = cdist(XA, XB, yule)
+ elif metric == 'test_matching':
+ dm = cdist(XA, XB, matching)
+ elif metric == 'test_dice':
+ dm = cdist(XA, XB, dice)
+ elif metric == 'test_kulsinski':
+ dm = cdist(XA, XB, kulsinski)
+ elif metric == 'test_rogerstanimoto':
+ dm = cdist(XA, XB, rogerstanimoto)
+ elif metric == 'test_russellrao':
+ dm = cdist(XA, XB, russellrao)
+ elif metric == 'test_sokalsneath':
+ dm = cdist(XA, XB, sokalsneath)
+ elif metric == 'test_sokalmichener':
+ dm = cdist(XA, XB, sokalmichener)
+ else:
+ raise ValueError('Unknown Distance Metric: %s' % mstr)
+ else:
+ raise TypeError('2nd argument metric must be a string identifier '
+ 'or a function.')
+ return dm
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/kdtree.py
--- /dev/null
+++ b/yt/utilities/spatial/kdtree.py
@@ -0,0 +1,881 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import sys
+import numpy as np
+from heapq import heappush, heappop
+#import scipy.sparse
+
+__all__ = ['minkowski_distance_p', 'minkowski_distance',
+ 'distance_matrix',
+ 'Rectangle', 'KDTree']
+
+
+def minkowski_distance_p(x, y, p=2):
+ """
+ Compute the p-th power of the L**p distance between x and y.
+
+ For efficiency, this function computes the L**p distance but does
+ not extract the pth root. If p is 1 or infinity, this is equal to
+ the actual L**p distance.
+
+ Parameters
+ ----------
+ x : array_like, M by K
+
+ y : array_like, N by K
+
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+
+ Examples
+ --------
+ >>> minkowski_distance_p([[0,0],[0,0]], [[1,1],[0,1]])
+ array([2, 1])
+
+ """
+ x = np.asarray(x)
+ y = np.asarray(y)
+ if p == np.inf:
+ return np.amax(np.abs(y-x), axis=-1)
+ elif p == 1:
+ return np.sum(np.abs(y-x), axis=-1)
+ else:
+ return np.sum(np.abs(y-x)**p, axis=-1)
+
+def minkowski_distance(x, y, p=2):
+ """
+ Compute the L**p distance between x and y.
+
+ Parameters
+ ----------
+ x : array_like, M by K
+
+ y : array_like, N by K
+
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+
+ Examples
+ --------
+ >>> minkowski_distance([[0,0],[0,0]], [[1,1],[0,1]])
+ array([ 1.41421356, 1. ])
+
+ """
+ x = np.asarray(x)
+ y = np.asarray(y)
+ if p == np.inf or p == 1:
+ return minkowski_distance_p(x, y, p)
+ else:
+ return minkowski_distance_p(x, y, p)**(1./p)
+
+class Rectangle(object):
+ """Hyperrectangle class.
+
+ Represents a Cartesian product of intervals.
+ """
+ def __init__(self, maxes, mins):
+ """Construct a hyperrectangle."""
+ self.maxes = np.maximum(maxes,mins).astype(np.float)
+ self.mins = np.minimum(maxes,mins).astype(np.float)
+ self.m, = self.maxes.shape
+
+ def __repr__(self):
+ return "<Rectangle %s>" % zip(self.mins, self.maxes)
+
+ def volume(self):
+ """Total volume."""
+ return np.prod(self.maxes-self.mins)
+
+ def split(self, d, split):
+ """Produce two hyperrectangles by splitting along axis d.
+
+ In general, if you need to compute maximum and minimum
+ distances to the children, it can be done more efficiently
+ by updating the maximum and minimum distances to the parent.
+ """ # FIXME: do this
+ mid = np.copy(self.maxes)
+ mid[d] = split
+ less = Rectangle(self.mins, mid)
+ mid = np.copy(self.mins)
+ mid[d] = split
+ greater = Rectangle(mid, self.maxes)
+ return less, greater
+
+ def min_distance_point(self, x, p=2.):
+ """Compute the minimum distance between x and a point in the hyperrectangle."""
+ return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-x,x-self.maxes)),p)
+
+ def max_distance_point(self, x, p=2.):
+ """Compute the maximum distance between x and a point in the hyperrectangle."""
+ return minkowski_distance(0, np.maximum(self.maxes-x,x-self.mins),p)
+
+ def min_distance_rectangle(self, other, p=2.):
+ """Compute the minimum distance between points in the two hyperrectangles."""
+ return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-other.maxes,other.mins-self.maxes)),p)
+
+ def max_distance_rectangle(self, other, p=2.):
+ """Compute the maximum distance between points in the two hyperrectangles."""
+ return minkowski_distance(0, np.maximum(self.maxes-other.mins,other.maxes-self.mins),p)
+
+
+class KDTree(object):
+ """
+ kd-tree for quick nearest-neighbor lookup
+
+ This class provides an index into a set of k-dimensional points
+ which can be used to rapidly look up the nearest neighbors of any
+ point.
+
+ The algorithm used is described in Maneewongvatana and Mount 1999.
+ The general idea is that the kd-tree is a binary tree, each of whose
+ nodes represents an axis-aligned hyperrectangle. Each node specifies
+ an axis and splits the set of points based on whether their coordinate
+ along that axis is greater than or less than a particular value.
+
+ During construction, the axis and splitting point are chosen by the
+ "sliding midpoint" rule, which ensures that the cells do not all
+ become long and thin.
+
+ The tree can be queried for the r closest neighbors of any given point
+ (optionally returning only those within some maximum distance of the
+ point). It can also be queried, with a substantial gain in efficiency,
+ for the r approximate closest neighbors.
+
+ For large dimensions (20 is already large) do not expect this to run
+ significantly faster than brute force. High-dimensional nearest-neighbor
+ queries are a substantial open problem in computer science.
+
+ The tree also supports all-neighbors queries, both with arrays of points
+ and with other kd-trees. These do use a reasonably efficient algorithm,
+ but the kd-tree is not necessarily the best data structure for this
+ sort of calculation.
+
+ """
+ def __init__(self, data, leafsize=10):
+ """Construct a kd-tree.
+
+ Parameters
+ ----------
+ data : array_like, shape (n,k)
+ The data points to be indexed. This array is not copied, and
+ so modifying this data will result in bogus results.
+ leafsize : positive int
+ The number of points at which the algorithm switches over to
+ brute-force.
+ """
+ self.data = np.asarray(data)
+ self.n, self.m = np.shape(self.data)
+ self.leafsize = int(leafsize)
+ if self.leafsize<1:
+ raise ValueError("leafsize must be at least 1")
+ self.maxes = np.amax(self.data,axis=0)
+ self.mins = np.amin(self.data,axis=0)
+
+ self.tree = self.__build(np.arange(self.n), self.maxes, self.mins)
+
+ class node(object):
+ if sys.version_info[0] >= 3:
+ def __lt__(self, other): id(self) < id(other)
+ def __gt__(self, other): id(self) > id(other)
+ def __le__(self, other): id(self) <= id(other)
+ def __ge__(self, other): id(self) >= id(other)
+ def __eq__(self, other): id(self) == id(other)
+
+ class leafnode(node):
+ def __init__(self, idx):
+ self.idx = idx
+ self.children = len(idx)
+
+ class innernode(node):
+ def __init__(self, split_dim, split, less, greater):
+ self.split_dim = split_dim
+ self.split = split
+ self.less = less
+ self.greater = greater
+ self.children = less.children+greater.children
+
+ def __build(self, idx, maxes, mins):
+ if len(idx)<=self.leafsize:
+ return KDTree.leafnode(idx)
+ else:
+ data = self.data[idx]
+ #maxes = np.amax(data,axis=0)
+ #mins = np.amin(data,axis=0)
+ d = np.argmax(maxes-mins)
+ maxval = maxes[d]
+ minval = mins[d]
+ if maxval==minval:
+ # all points are identical; warn user?
+ return KDTree.leafnode(idx)
+ data = data[:,d]
+
+ # sliding midpoint rule; see Maneewongvatana and Mount 1999
+ # for arguments that this is a good idea.
+ split = (maxval+minval)/2
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(less_idx)==0:
+ split = np.amin(data)
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(greater_idx)==0:
+ split = np.amax(data)
+ less_idx = np.nonzero(data<split)[0]
+ greater_idx = np.nonzero(data>=split)[0]
+ if len(less_idx)==0:
+ # _still_ zero? all must have the same value
+ if not np.all(data==data[0]):
+ raise ValueError("Troublesome data array: %s" % data)
+ split = data[0]
+ less_idx = np.arange(len(data)-1)
+ greater_idx = np.array([len(data)-1])
+
+ lessmaxes = np.copy(maxes)
+ lessmaxes[d] = split
+ greatermins = np.copy(mins)
+ greatermins[d] = split
+ return KDTree.innernode(d, split,
+ self.__build(idx[less_idx],lessmaxes,mins),
+ self.__build(idx[greater_idx],maxes,greatermins))
+
+ def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+
+ side_distances = np.maximum(0,np.maximum(x-self.maxes,self.mins-x))
+ if p!=np.inf:
+ side_distances**=p
+ min_distance = np.sum(side_distances)
+ else:
+ min_distance = np.amax(side_distances)
+
+ # priority queue for chasing nodes
+ # entries are:
+ # minimum distance between the cell and the target
+ # distances between the nearest side of the cell and the target
+ # the head node of the cell
+ q = [(min_distance,
+ tuple(side_distances),
+ self.tree)]
+ # priority queue for the nearest neighbors
+ # furthest known neighbor first
+ # entries are (-distance**p, i)
+ neighbors = []
+
+ if eps==0:
+ epsfac=1
+ elif p==np.inf:
+ epsfac = 1/(1+eps)
+ else:
+ epsfac = 1/(1+eps)**p
+
+ if p!=np.inf and distance_upper_bound!=np.inf:
+ distance_upper_bound = distance_upper_bound**p
+
+ while q:
+ min_distance, side_distances, node = heappop(q)
+ if isinstance(node, KDTree.leafnode):
+ # brute-force
+ data = self.data[node.idx]
+ ds = minkowski_distance_p(data,x[np.newaxis,:],p)
+ for i in range(len(ds)):
+ if ds[i]<distance_upper_bound:
+ if len(neighbors)==k:
+ heappop(neighbors)
+ heappush(neighbors, (-ds[i], node.idx[i]))
+ if len(neighbors)==k:
+ distance_upper_bound = -neighbors[0][0]
+ else:
+ # we don't push cells that are too far onto the queue at all,
+ # but since the distance_upper_bound decreases, we might get
+ # here even if the cell's too far
+ if min_distance>distance_upper_bound*epsfac:
+ # since this is the nearest cell, we're done, bail out
+ break
+ # compute minimum distances to the children and push them on
+ if x[node.split_dim]<node.split:
+ near, far = node.less, node.greater
+ else:
+ near, far = node.greater, node.less
+
+ # near child is at the same distance as the current node
+ heappush(q,(min_distance, side_distances, near))
+
+ # far child is further by an amount depending only
+ # on the split value
+ sd = list(side_distances)
+ if p == np.inf:
+ min_distance = max(min_distance, abs(node.split-x[node.split_dim]))
+ elif p == 1:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+ else:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])**p
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+
+ # far child might be too far, if so, don't bother pushing it
+ if min_distance<=distance_upper_bound*epsfac:
+ heappush(q,(min_distance, tuple(sd), far))
+
+ if p==np.inf:
+ return sorted([(-d,i) for (d,i) in neighbors])
+ else:
+ return sorted([((-d)**(1./p),i) for (d,i) in neighbors])
+
+ def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+ """
+ Query the kd-tree for nearest neighbors
+
+ Parameters
+ ----------
+ x : array_like, last dimension self.m
+ An array of points to query.
+ k : integer
+ The number of nearest neighbors to return.
+ eps : nonnegative float
+ Return approximate nearest neighbors; the kth returned value
+ is guaranteed to be no further than (1+eps) times the
+ distance to the real kth nearest neighbor.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use.
+ 1 is the sum-of-absolute-values "Manhattan" distance
+ 2 is the usual Euclidean distance
+ infinity is the maximum-coordinate-difference distance
+ distance_upper_bound : nonnegative float
+ Return only neighbors within this distance. This is used to prune
+ tree searches, so if you are doing a series of nearest-neighbor
+ queries, it may help to supply the distance to the nearest neighbor
+ of the most recent point.
+
+ Returns
+ -------
+ d : array of floats
+ The distances to the nearest neighbors.
+ If x has shape tuple+(self.m,), then d has shape tuple if
+ k is one, or tuple+(k,) if k is larger than one. Missing
+ neighbors are indicated with infinite distances. If k is None,
+ then d is an object array of shape tuple, containing lists
+ of distances. In either case the hits are sorted by distance
+ (nearest first).
+ i : array of integers
+ The locations of the neighbors in self.data. i is the same
+ shape as d.
+
+ Examples
+ --------
+ >>> from scipy.spatial import KDTree
+ >>> x, y = np.mgrid[0:5, 2:8]
+ >>> tree = KDTree(zip(x.ravel(), y.ravel()))
+ >>> tree.data
+ array([[0, 2],
+ [0, 3],
+ [0, 4],
+ [0, 5],
+ [0, 6],
+ [0, 7],
+ [1, 2],
+ [1, 3],
+ [1, 4],
+ [1, 5],
+ [1, 6],
+ [1, 7],
+ [2, 2],
+ [2, 3],
+ [2, 4],
+ [2, 5],
+ [2, 6],
+ [2, 7],
+ [3, 2],
+ [3, 3],
+ [3, 4],
+ [3, 5],
+ [3, 6],
+ [3, 7],
+ [4, 2],
+ [4, 3],
+ [4, 4],
+ [4, 5],
+ [4, 6],
+ [4, 7]])
+ >>> pts = np.array([[0, 0], [2.1, 2.9]])
+ >>> tree.query(pts)
+ (array([ 2. , 0.14142136]), array([ 0, 13]))
+
+ """
+ x = np.asarray(x)
+ if np.shape(x)[-1] != self.m:
+ raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x)))
+ if p<1:
+ raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+ retshape = np.shape(x)[:-1]
+ if retshape!=():
+ if k is None:
+ dd = np.empty(retshape,dtype=np.object)
+ ii = np.empty(retshape,dtype=np.object)
+ elif k>1:
+ dd = np.empty(retshape+(k,),dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape+(k,),dtype=np.int)
+ ii.fill(self.n)
+ elif k==1:
+ dd = np.empty(retshape,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape,dtype=np.int)
+ ii.fill(self.n)
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+ for c in np.ndindex(retshape):
+ hits = self.__query(x[c], k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k is None:
+ dd[c] = [d for (d,i) in hits]
+ ii[c] = [i for (d,i) in hits]
+ elif k>1:
+ for j in range(len(hits)):
+ dd[c+(j,)], ii[c+(j,)] = hits[j]
+ elif k==1:
+ if len(hits)>0:
+ dd[c], ii[c] = hits[0]
+ else:
+ dd[c] = np.inf
+ ii[c] = self.n
+ return dd, ii
+ else:
+ hits = self.__query(x, k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k is None:
+ return [d for (d,i) in hits], [i for (d,i) in hits]
+ elif k==1:
+ if len(hits)>0:
+ return hits[0]
+ else:
+ return np.inf, self.n
+ elif k>1:
+ dd = np.empty(k,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(k,dtype=np.int)
+ ii.fill(self.n)
+ for j in range(len(hits)):
+ dd[j], ii[j] = hits[j]
+ return dd, ii
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+
+
+ def __query_ball_point(self, x, r, p=2., eps=0):
+ R = Rectangle(self.maxes, self.mins)
+
+ def traverse_checking(node, rect):
+ if rect.min_distance_point(x, p) > r / (1. + eps):
+ return []
+ elif rect.max_distance_point(x, p) < r * (1. + eps):
+ return traverse_no_checking(node)
+ elif isinstance(node, KDTree.leafnode):
+ d = self.data[node.idx]
+ return node.idx[minkowski_distance(d, x, p) <= r].tolist()
+ else:
+ less, greater = rect.split(node.split_dim, node.split)
+ return traverse_checking(node.less, less) + \
+ traverse_checking(node.greater, greater)
+
+ def traverse_no_checking(node):
+ if isinstance(node, KDTree.leafnode):
+ return node.idx.tolist()
+ else:
+ return traverse_no_checking(node.less) + \
+ traverse_no_checking(node.greater)
+
+ return traverse_checking(self.tree, R)
+
+ def query_ball_point(self, x, r, p=2., eps=0):
+ """Find all points within distance r of point(s) x.
+
+ Parameters
+ ----------
+ x : array_like, shape tuple + (self.m,)
+ The point or points to search for neighbors of.
+ r : positive float
+ The radius of points to return.
+ p : float, optional
+ Which Minkowski p-norm to use. Should be in the range [1, inf].
+ eps : nonnegative float, optional
+ Approximate search. Branches of the tree are not explored if their
+ nearest points are further than ``r / (1 + eps)``, and branches are
+ added in bulk if their furthest points are nearer than
+ ``r * (1 + eps)``.
+
+ Returns
+ -------
+ results : list or array of lists
+ If `x` is a single point, returns a list of the indices of the
+ neighbors of `x`. If `x` is an array of points, returns an object
+ array of shape tuple containing lists of neighbors.
+
+ Notes
+ -----
+ If you have many points whose neighbors you want to find, you may save
+ substantial amounts of time by putting them in a KDTree and using
+ query_ball_tree.
+
+ Examples
+ --------
+ >>> from scipy import spatial
+ >>> x, y = np.mgrid[0:4, 0:4]
+ >>> points = zip(x.ravel(), y.ravel())
+ >>> tree = spatial.KDTree(points)
+ >>> tree.query_ball_point([2, 0], 1)
+ [4, 8, 9, 12]
+
+ """
+ x = np.asarray(x)
+ if x.shape[-1] != self.m:
+ raise ValueError("Searching for a %d-dimensional point in a " \
+ "%d-dimensional KDTree" % (x.shape[-1], self.m))
+ if len(x.shape) == 1:
+ return self.__query_ball_point(x, r, p, eps)
+ else:
+ retshape = x.shape[:-1]
+ result = np.empty(retshape, dtype=np.object)
+ for c in np.ndindex(retshape):
+ result[c] = self.__query_ball_point(x[c], r, p=p, eps=eps)
+ return result
+
+ def query_ball_tree(self, other, r, p=2., eps=0):
+ """Find all pairs of points whose distance is at most r
+
+ Parameters
+ ==========
+
+ other : KDTree
+ The tree containing points to search against
+ r : positive float
+ The maximum distance
+ p : float 1<=p<=infinity
+ Which Minkowski norm to use
+ eps : nonnegative float
+ Approximate search. Branches of the tree are not explored
+ if their nearest points are further than r/(1+eps), and branches
+ are added in bulk if their furthest points are nearer than r*(1+eps).
+
+ Returns
+ =======
+
+ results : list of lists
+ For each element self.data[i] of this tree, results[i] is a list of the
+ indices of its neighbors in other.data.
+ """
+ results = [[] for i in range(self.n)]
+ def traverse_checking(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>r/(1.+eps):
+ return
+ elif rect1.max_distance_rectangle(rect2, p)<r*(1.+eps):
+ traverse_no_checking(node1, node2)
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ d = other.data[node2.idx]
+ for i in node1.idx:
+ results[i] += node2.idx[minkowski_distance(d,self.data[i],p)<=r].tolist()
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1,rect1,node2.less,less)
+ traverse_checking(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse_checking(node1.less,less,node2,rect2)
+ traverse_checking(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1.less,less1,node2.less,less2)
+ traverse_checking(node1.less,less1,node2.greater,greater2)
+ traverse_checking(node1.greater,greater1,node2.less,less2)
+ traverse_checking(node1.greater,greater1,node2.greater,greater2)
+
+ def traverse_no_checking(node1, node2):
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ results[i] += node2.idx.tolist()
+ else:
+ traverse_no_checking(node1, node2.less)
+ traverse_no_checking(node1, node2.greater)
+ else:
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+
+ traverse_checking(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+ return results
+
+ def query_pairs(self, r, p=2., eps=0):
+ """Find all pairs of points whose distance is at most r
+
+ Parameters
+ ==========
+
+ r : positive float
+ The maximum distance
+ p : float 1<=p<=infinity
+ Which Minkowski norm to use
+ eps : nonnegative float
+ Approximate search. Branches of the tree are not explored
+ if their nearest points are further than r/(1+eps), and branches
+ are added in bulk if their furthest points are nearer than r*(1+eps).
+
+ Returns
+ =======
+
+ results : set
+ set of pairs (i,j), i<j, for which the corresponing positions are
+ close.
+
+ """
+ results = set()
+ visited = set()
+ def test_set_visited(node1, node2):
+ i, j = sorted((id(node1),id(node2)))
+ if (i,j) in visited:
+ return True
+ else:
+ visited.add((i,j))
+ return False
+ def traverse_checking(node1, rect1, node2, rect2):
+ if test_set_visited(node1, node2):
+ return
+
+ if id(node2)<id(node1):
+ # This node pair will be visited in the other order
+ #return
+ pass
+
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ d = self.data[node2.idx]
+ for i in node1.idx:
+ for j in node2.idx[minkowski_distance(d,self.data[i],p)<=r]:
+ if i<j:
+ results.add((i,j))
+ elif j<i:
+ results.add((j,i))
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1,rect1,node2.less,less)
+ traverse_checking(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse_checking(node1.less,less,node2,rect2)
+ traverse_checking(node1.greater,greater,node2,rect2)
+ elif rect1.min_distance_rectangle(rect2, p)>r/(1.+eps):
+ return
+ elif rect1.max_distance_rectangle(rect2, p)<r*(1.+eps):
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1.less,less1,node2.less,less2)
+ traverse_checking(node1.less,less1,node2.greater,greater2)
+ traverse_checking(node1.greater,greater1,node2.less,less2)
+ traverse_checking(node1.greater,greater1,node2.greater,greater2)
+
+ def traverse_no_checking(node1, node2):
+ if test_set_visited(node1, node2):
+ return
+
+ if id(node2)<id(node1):
+ # This node pair will be visited in the other order
+ #return
+ pass
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ for j in node2.idx:
+ if i<j:
+ results.add((i,j))
+ elif j<i:
+ results.add((j,i))
+ else:
+ traverse_no_checking(node1, node2.less)
+ traverse_no_checking(node1, node2.greater)
+ else:
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+
+ traverse_checking(self.tree, Rectangle(self.maxes, self.mins),
+ self.tree, Rectangle(self.maxes, self.mins))
+ return results
+
+
+ def count_neighbors(self, other, r, p=2.):
+ """Count how many nearby pairs can be formed.
+
+ Count the number of pairs (x1,x2) can be formed, with x1 drawn
+ from self and x2 drawn from other, and where distance(x1,x2,p)<=r.
+ This is the "two-point correlation" described in Gray and Moore 2000,
+ "N-body problems in statistical learning", and the code here is based
+ on their algorithm.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ r : float or one-dimensional array of floats
+ The radius to produce a count for. Multiple radii are searched with a single
+ tree traversal.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use
+
+ Returns
+ =======
+
+ result : integer or one-dimensional array of integers
+ The number of pairs. Note that this is internally stored in a numpy int,
+ and so may overflow if very large (two billion).
+ """
+
+ def traverse(node1, rect1, node2, rect2, idx):
+ min_r = rect1.min_distance_rectangle(rect2,p)
+ max_r = rect1.max_distance_rectangle(rect2,p)
+ c_greater = r[idx]>max_r
+ result[idx[c_greater]] += node1.children*node2.children
+ idx = idx[(min_r<=r[idx]) & (r[idx]<=max_r)]
+ if len(idx)==0:
+ return
+
+ if isinstance(node1,KDTree.leafnode):
+ if isinstance(node2,KDTree.leafnode):
+ ds = minkowski_distance(self.data[node1.idx][:,np.newaxis,:],
+ other.data[node2.idx][np.newaxis,:,:],
+ p).ravel()
+ ds.sort()
+ result[idx] += np.searchsorted(ds,r[idx],side='right')
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1, rect1, node2.less, less, idx)
+ traverse(node1, rect1, node2.greater, greater, idx)
+ else:
+ if isinstance(node2,KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less, less, node2, rect2, idx)
+ traverse(node1.greater, greater, node2, rect2, idx)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2,idx)
+ traverse(node1.less,less1,node2.greater,greater2,idx)
+ traverse(node1.greater,greater1,node2.less,less2,idx)
+ traverse(node1.greater,greater1,node2.greater,greater2,idx)
+ R1 = Rectangle(self.maxes, self.mins)
+ R2 = Rectangle(other.maxes, other.mins)
+ if np.shape(r) == ():
+ r = np.array([r])
+ result = np.zeros(1,dtype=int)
+ traverse(self.tree, R1, other.tree, R2, np.arange(1))
+ return result[0]
+ elif len(np.shape(r))==1:
+ r = np.asarray(r)
+ n, = r.shape
+ result = np.zeros(n,dtype=int)
+ traverse(self.tree, R1, other.tree, R2, np.arange(n))
+ return result
+ else:
+ raise ValueError("r must be either a single value or a one-dimensional array of values")
+
+ def sparse_distance_matrix(self, other, max_distance, p=2.):
+ """Compute a sparse distance matrix
+
+ Computes a distance matrix between two KDTrees, leaving as zero
+ any distance greater than max_distance.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ max_distance : positive float
+
+ Returns
+ =======
+
+ result : dok_matrix
+ Sparse matrix representing the results in "dictionary of keys" format.
+ """
+ result = scipy.sparse.dok_matrix((self.n,other.n))
+
+ def traverse(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>max_distance:
+ return
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ for j in node2.idx:
+ d = minkowski_distance(self.data[i],other.data[j],p)
+ if d<=max_distance:
+ result[i,j] = d
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1,rect1,node2.less,less)
+ traverse(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less,less,node2,rect2)
+ traverse(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2)
+ traverse(node1.less,less1,node2.greater,greater2)
+ traverse(node1.greater,greater1,node2.less,less2)
+ traverse(node1.greater,greater1,node2.greater,greater2)
+ traverse(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+
+ return result
+
+
+def distance_matrix(x,y,p=2,threshold=1000000):
+ """
+ Compute the distance matrix.
+
+ Returns the matrix of all pair-wise distances.
+
+ Parameters
+ ----------
+ x : array_like, `M` by `K`
+ TODO: description needed
+ y : array_like, `N` by `K`
+ TODO: description needed
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+ threshold : positive integer
+ If `M * N * K` > threshold, use a Python loop instead of creating
+ a very large temporary [what? array?].
+
+ Returns
+ -------
+ result : array_like, `M` by `N`
+
+ Examples
+ --------
+ >>> distance_matrix([[0,0],[0,1]], [[1,0],[1,1]])
+ array([[ 1. , 1.41421356],
+ [ 1.41421356, 1. ]])
+
+ """
+
+ x = np.asarray(x)
+ m, k = x.shape
+ y = np.asarray(y)
+ n, kk = y.shape
+
+ if k != kk:
+ raise ValueError("x contains %d-dimensional vectors but y contains %d-dimensional vectors" % (k, kk))
+
+ if m*n*k <= threshold:
+ return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
+ else:
+ result = np.empty((m,n),dtype=np.float) #FIXME: figure out the best dtype
+ if m<n:
+ for i in range(m):
+ result[i,:] = minkowski_distance(x[i],y,p)
+ else:
+ for j in range(n):
+ result[:,j] = minkowski_distance(x,y[j],p)
+ return result
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/setup.py
--- /dev/null
+++ b/yt/utilities/spatial/setup.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+from os.path import join
+
+def configuration(parent_package = '', top_path = None):
+ from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
+ from numpy.distutils.system_info import get_info
+ from distutils.sysconfig import get_python_inc
+
+ config = Configuration('spatial', parent_package, top_path)
+
+ config.add_data_dir('tests')
+
+# qhull_src = ['geom2.c', 'geom.c', 'global.c', 'io.c', 'libqhull.c',
+# 'mem.c', 'merge.c', 'poly2.c', 'poly.c', 'qset.c',
+# 'random.c', 'rboxlib.c', 'stat.c', 'user.c', 'usermem.c',
+# 'userprintf.c']
+
+# config.add_library('qhull',
+# sources=[join('qhull', 'src', x) for x in qhull_src],
+# include_dirs=[get_python_inc(),
+# get_numpy_include_dirs()],
+# # XXX: GCC dependency!
+# #extra_compiler_args=['-fno-strict-aliasing'],
+# )
+
+# lapack = dict(get_info('lapack_opt'))
+# try:
+# libs = ['qhull'] + lapack.pop('libraries')
+# except KeyError:
+# libs = ['qhull']
+# config.add_extension('qhull',
+# sources=['qhull.c'],
+# libraries=libs,
+# **lapack)
+
+ config.add_extension('ckdtree', sources=['ckdtree.c']) # FIXME: cython
+
+ config.add_extension('_distance_wrap',
+ sources=[join('src', 'distance_wrap.c'), join('src', 'distance.c')],
+ include_dirs = [get_numpy_include_dirs()])
+
+ return config
+
+if __name__ == '__main__':
+ from numpy.distutils.core import setup
+ setup(maintainer = "SciPy Developers",
+ author = "Anne Archibald",
+ maintainer_email = "scipy-dev at scipy.org",
+ description = "Spatial algorithms and data structures",
+ url = "http://www.scipy.org",
+ license = "SciPy License (BSD Style)",
+ **configuration(top_path='').todict()
+ )
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/setupscons.py
--- /dev/null
+++ b/yt/utilities/spatial/setupscons.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+from os.path import join
+
+def configuration(parent_package = '', top_path = None):
+ from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
+ config = Configuration('spatial', parent_package, top_path)
+
+ config.add_data_dir('tests')
+ config.add_sconscript('SConstruct')
+
+ return config
+
+if __name__ == '__main__':
+ from numpy.distutils.core import setup
+ setup(maintainer = "SciPy Developers",
+ author = "Anne Archibald",
+ maintainer_email = "scipy-dev at scipy.org",
+ description = "Spatial algorithms and data structures",
+ url = "http://www.scipy.org",
+ license = "SciPy License (BSD Style)",
+ **configuration(top_path='').todict()
+ )
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/src/common.h
--- /dev/null
+++ b/yt/utilities/spatial/src/common.h
@@ -0,0 +1,70 @@
+/**
+ * common.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved into new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CLUSTER_COMMON_H
+#define _CLUSTER_COMMON_H
+
+#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
+#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))
+
+#define NCHOOSE2(_n) ((_n)*(_n-1)/2)
+
+#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
+#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
+ CPY_BITS_PER_CHAR))
+#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
+ ((CPY_BITS_PER_CHAR-1) - \
+ ((i) % CPY_BITS_PER_CHAR))) & 0x1)
+#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
+ ((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
+ ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+
+#ifndef CPY_CEIL_DIV
+#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
+ ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
+#endif
+
+
+#ifdef CPY_DEBUG
+#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define CPY_DEBUG_MSG(...)
+#endif
+
+#endif
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/src/distance.c
--- /dev/null
+++ b/yt/utilities/spatial/src/distance.c
@@ -0,0 +1,958 @@
+/**
+ * distance.c
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <Python.h>
+#include <numpy/ndarrayobject.h>
+
+#include <math.h>
+#include <stdlib.h>
+#include "common.h"
+#include "distance.h"
+
+static NPY_INLINE double euclidean_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = u[i] - v[i];
+ s = s + d * d;
+ }
+ return sqrt(s);
+}
+
+static NPY_INLINE double ess_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + d * d;
+ }
+ return s;
+}
+
+static NPY_INLINE double chebyshev_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double d, maxv = 0.0;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ if (d > maxv) {
+ maxv = d;
+ }
+ }
+ return maxv;
+}
+
+static NPY_INLINE double canberra_distance(const double *u, const double *v, int n) {
+ int i;
+ double snum = 0.0, sdenom = 0.0, tot = 0.0;
+ for (i = 0; i < n; i++) {
+ snum = fabs(u[i] - v[i]);
+ sdenom = fabs(u[i]) + fabs(v[i]);
+ if (sdenom > 0.0) {
+ tot += snum / sdenom;
+ }
+ }
+ return tot;
+}
+
+static NPY_INLINE double bray_curtis_distance(const double *u, const double *v, int n) {
+ int i;
+ double s1 = 0.0, s2 = 0.0;
+ for (i = 0; i < n; i++) {
+ s1 += fabs(u[i] - v[i]);
+ s2 += fabs(u[i] + v[i]);
+ }
+ return s1 / s2;
+}
+
+static NPY_INLINE double mahalanobis_distance(const double *u, const double *v,
+ const double *covinv, double *dimbuf1,
+ double *dimbuf2, int n) {
+ int i, j;
+ double s;
+ const double *covrow = covinv;
+ for (i = 0; i < n; i++) {
+ dimbuf1[i] = u[i] - v[i];
+ }
+ for (i = 0; i < n; i++) {
+ covrow = covinv + (i * n);
+ s = 0.0;
+ for (j = 0; j < n; j++) {
+ s += dimbuf1[j] * covrow[j];
+ }
+ dimbuf2[i] = s;
+ }
+ s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += dimbuf1[i] * dimbuf2[i];
+ }
+ return sqrt(s);
+}
+
+double hamming_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s = s + (u[i] != v[i]);
+ }
+ return s / (double)n;
+}
+
+static NPY_INLINE double hamming_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s = s + (u[i] != v[i]);
+ }
+ return s / (double)n;
+}
+
+static NPY_INLINE double yule_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nff = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ nff += (!u[i] && !v[i]);
+ }
+ return (2.0 * ntf * nft) / (double)(ntt * nff + ntf * nft);
+}
+
+static NPY_INLINE double matching_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ }
+ return (double)(ntf + nft) / (double)(n);
+}
+
+static NPY_INLINE double dice_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ }
+ return (double)(nft + ntf) / (double)(2.0 * ntt + ntf + nft);
+}
+
+
+static NPY_INLINE double rogerstanimoto_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nff = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ nff += (!u[i] && !v[i]);
+ }
+ return (2.0 * (ntf + nft)) / ((double)ntt + nff + (2.0 * (ntf + nft)));
+}
+
+static NPY_INLINE double russellrao_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ /** int nff = 0, nft = 0, ntf = 0;**/
+ int ntt = 0;
+ for (i = 0; i < n; i++) {
+ /** nff += (!u[i] && !v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);**/
+ ntt += (u[i] && v[i]);
+ }
+ /** return (double)(ntf + nft + nff) / (double)n;**/
+ return (double) (n - ntt) / (double) n;
+}
+
+static NPY_INLINE double kulsinski_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0, nff = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ nff += (!u[_i] && !v[_i]);
+ }
+ return ((double)(ntf + nft - ntt + n)) / ((double)(ntf + nft + n));
+}
+
+static NPY_INLINE double sokalsneath_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ }
+ return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt);
+}
+
+static NPY_INLINE double sokalmichener_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0, nff = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ nff += (!u[_i] && !v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ }
+ return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt + nff);
+}
+
+static NPY_INLINE double jaccard_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double denom = 0.0, num = 0.0;
+ for (i = 0; i < n; i++) {
+ num += (u[i] != v[i]) && ((u[i] != 0.0) || (v[i] != 0.0));
+ denom += (u[i] != 0.0) || (v[i] != 0.0);
+ }
+ return num / denom;
+}
+
+static NPY_INLINE double jaccard_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ double num = 0.0, denom = 0.0;
+ for (i = 0; i < n; i++) {
+ num += (u[i] != v[i]) && ((u[i] != 0) || (v[i] != 0));
+ denom += (u[i] != 0) || (v[i] != 0);
+ }
+ return num / denom;
+}
+
+static NPY_INLINE double dot_product(const double *u, const double *v, int n) {
+ int i;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += u[i] * v[i];
+ }
+ return s;
+}
+
+static NPY_INLINE double cosine_distance(const double *u, const double *v, int n,
+ const double nu, const double nv) {
+ return 1.0 - (dot_product(u, v, n) / (nu * nv));
+}
+
+static NPY_INLINE double seuclidean_distance(const double *var,
+ const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = u[i] - v[i];
+ s = s + (d * d) / var[i];
+ }
+ return sqrt(s);
+}
+
+static NPY_INLINE double city_block_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + d;
+ }
+ return s;
+}
+
+double minkowski_distance(const double *u, const double *v, int n, double p) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + pow(d, p);
+ }
+ return pow(s, 1.0 / p);
+}
+
+double weighted_minkowski_distance(const double *u, const double *v, int n, double p, const double *w) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]) * w[i];
+ s = s + pow(d, p);
+ }
+ return pow(s, 1.0 / p);
+}
+
+void compute_mean_vector(double *res, const double *X, int m, int n) {
+ int i, j;
+ const double *v;
+ for (i = 0; i < n; i++) {
+ res[i] = 0.0;
+ }
+ for (j = 0; j < m; j++) {
+
+ v = X + (j * n);
+ for (i = 0; i < n; i++) {
+ res[i] += v[i];
+ }
+ }
+ for (i = 0; i < n; i++) {
+ res[i] /= (double)m;
+ }
+}
+
+void pdist_euclidean(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = euclidean_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_mahalanobis(const double *X, const double *covinv,
+ double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ double *dimbuf1, *dimbuf2;
+ dimbuf1 = (double*)malloc(sizeof(double) * 2 * n);
+ dimbuf2 = dimbuf1 + n;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, n);
+ }
+ }
+ dimbuf2 = 0;
+ free(dimbuf1);
+}
+
+void pdist_bray_curtis(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = bray_curtis_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_canberra(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = canberra_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_hamming(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = hamming_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_hamming_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = hamming_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_jaccard(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = jaccard_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_jaccard_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = jaccard_distance_bool(u, v, n);
+ }
+ }
+}
+
+
+void pdist_chebyshev(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = chebyshev_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = cosine_distance(u, v, n, norms[i], norms[j]);
+ }
+ }
+}
+
+void pdist_seuclidean(const double *X, const double *var,
+ double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = seuclidean_distance(var, u, v, n);
+ }
+ }
+}
+
+void pdist_city_block(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = city_block_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_minkowski(const double *X, double *dm, int m, int n, double p) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = minkowski_distance(u, v, n, p);
+ }
+ }
+}
+
+void pdist_weighted_minkowski(const double *X, double *dm, int m, int n, double p, const double *w) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = weighted_minkowski_distance(u, v, n, p, w);
+ }
+ }
+}
+
+void pdist_yule_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = yule_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_matching_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = matching_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_dice_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = dice_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = rogerstanimoto_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_russellrao_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = russellrao_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_kulsinski_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = kulsinski_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = sokalsneath_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = sokalmichener_distance_bool(u, v, n);
+ }
+ }
+}
+
+void dist_to_squareform_from_vector(double *M, const double *v, int n) {
+ double *it;
+ const double *cit;
+ int i, j;
+ cit = v;
+ for (i = 0; i < n - 1; i++) {
+ it = M + (i * n) + i + 1;
+ for (j = i + 1; j < n; j++, it++, cit++) {
+ *it = *cit;
+ }
+ }
+}
+
+void dist_to_vector_from_squareform(const double *M, double *v, int n) {
+ double *it;
+ const double *cit;
+ int i, j;
+ it = v;
+ for (i = 0; i < n - 1; i++) {
+ cit = M + (i * n) + i + 1;
+ for (j = i + 1; j < n; j++, it++, cit++) {
+ *it = *cit;
+ }
+ }
+}
+
+
+/** cdist */
+
+void cdist_euclidean(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = euclidean_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_mahalanobis(const double *XA,
+ const double *XB,
+ const double *covinv,
+ double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ double *dimbuf1, *dimbuf2;
+ dimbuf1 = (double*)malloc(sizeof(double) * 2 * n);
+ dimbuf2 = dimbuf1 + n;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, n);
+ }
+ }
+ dimbuf2 = 0;
+ free(dimbuf1);
+}
+
+void cdist_bray_curtis(const double *XA, const double *XB,
+ double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = bray_curtis_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_canberra(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = canberra_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_hamming(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = hamming_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_hamming_bool(const char *XA,
+ const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = hamming_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_jaccard(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = jaccard_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_jaccard_bool(const char *XA,
+ const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = jaccard_distance_bool(u, v, n);
+ }
+ }
+}
+
+
+void cdist_chebyshev(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = chebyshev_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_cosine(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n,
+ const double *normsA, const double *normsB) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = cosine_distance(u, v, n, normsA[i], normsB[j]);
+ }
+ }
+}
+
+void cdist_seuclidean(const double *XA,
+ const double *XB,
+ const double *var,
+ double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = seuclidean_distance(var, u, v, n);
+ }
+ }
+}
+
+void cdist_city_block(const double *XA, const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = city_block_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_minkowski(const double *XA, const double *XB, double *dm, int mA, int mB, int n, double p) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = minkowski_distance(u, v, n, p);
+ }
+ }
+}
+
+void cdist_weighted_minkowski(const double *XA, const double *XB, double *dm, int mA, int mB, int n, double p, const double *w) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = weighted_minkowski_distance(u, v, n, p, w);
+ }
+ }
+}
+
+void cdist_yule_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = yule_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_matching_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = matching_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_dice_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = dice_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_rogerstanimoto_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = rogerstanimoto_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_russellrao_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = russellrao_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_kulsinski_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = kulsinski_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_sokalsneath_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = sokalsneath_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_sokalmichener_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = sokalmichener_distance_bool(u, v, n);
+ }
+ }
+}
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/src/distance.h
--- /dev/null
+++ b/yt/utilities/spatial/src/distance.h
@@ -0,0 +1,116 @@
+/**
+ * distance.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CPY_DISTANCE_H
+#define _CPY_DISTANCE_H
+
+void dist_to_squareform_from_vector(double *M, const double *v, int n);
+void dist_to_vector_from_squareform(const double *M, double *v, int n);
+void pdist_euclidean(const double *X, double *dm, int m, int n);
+void pdist_seuclidean(const double *X,
+ const double *var, double *dm, int m, int n);
+void pdist_mahalanobis(const double *X, const double *covinv,
+ double *dm, int m, int n);
+void pdist_bray_curtis(const double *X, double *dm, int m, int n);
+void pdist_canberra(const double *X, double *dm, int m, int n);
+void pdist_hamming(const double *X, double *dm, int m, int n);
+void pdist_hamming_bool(const char *X, double *dm, int m, int n);
+void pdist_city_block(const double *X, double *dm, int m, int n);
+void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms);
+void pdist_chebyshev(const double *X, double *dm, int m, int n);
+void pdist_jaccard(const double *X, double *dm, int m, int n);
+void pdist_jaccard_bool(const char *X, double *dm, int m, int n);
+void pdist_kulsinski_bool(const char *X, double *dm, int m, int n);
+void pdist_minkowski(const double *X, double *dm, int m, int n, double p);
+void pdist_weighted_minkowski(const double *X, double *dm, int m, int n, double p, const double *w);
+void pdist_yule_bool(const char *X, double *dm, int m, int n);
+void pdist_matching_bool(const char *X, double *dm, int m, int n);
+void pdist_dice_bool(const char *X, double *dm, int m, int n);
+void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n);
+void pdist_russellrao_bool(const char *X, double *dm, int m, int n);
+void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n);
+void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n);
+
+void cdist_euclidean(const double *XA, const double *XB, double *dm, int mA, int mB, int n);
+void cdist_mahalanobis(const double *XA, const double *XB,
+ const double *covinv,
+ double *dm, int mA, int mB, int n);
+void cdist_bray_curtis(const double *XA, const double *XB,
+ double *dm, int mA, int mB, int n);
+void cdist_canberra(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_hamming(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_hamming_bool(const char *XA,
+ const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_jaccard(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_jaccard_bool(const char *XA,
+ const char *XB, double *dm, int mA, int mB, int n);
+void cdist_chebyshev(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_cosine(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n,
+ const double *normsA, const double *normsB);
+void cdist_seuclidean(const double *XA,
+ const double *XB,
+ const double *var,
+ double *dm, int mA, int mB, int n);
+void cdist_city_block(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_minkowski(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n, double p);
+void cdist_weighted_minkowski(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n, double p, const double *w);
+void cdist_yule_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_matching_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_dice_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_rogerstanimoto_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_russellrao_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_kulsinski_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_sokalsneath_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_sokalmichener_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+
+#endif
diff -r 0eb936dbc3431bb5fce2e42aba1796d3ad38d1ef -r 1e5955fdd14d398a1521f78680b3232b32236dda yt/utilities/spatial/src/distance_wrap.c
--- /dev/null
+++ b/yt/utilities/spatial/src/distance_wrap.c
@@ -0,0 +1,1163 @@
+/**
+ * distance_wrap.c
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Copyright (c) 2007, Damian Eads. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include "distance.h"
+#include "Python.h"
+#include <numpy/arrayobject.h>
+#include <stdio.h>
+
+extern PyObject *cdist_euclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_euclidean(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_canberra_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_canberra(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_bray_curtis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_bray_curtis(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *cdist_mahalanobis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *covinv_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ const double *covinv;
+ if (!PyArg_ParseTuple(args, "O!O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &covinv_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ covinv = (const double*)covinv_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_mahalanobis(XA, XB, covinv, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *cdist_chebyshev_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_chebyshev(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *cdist_cosine_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *normsA_, *normsB_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *normsA, *normsB;
+ if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &PyArray_Type, &normsA_,
+ &PyArray_Type, &normsB_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ normsA = (const double*)normsA_->data;
+ normsB = (const double*)normsB_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_cosine(XA, XB, dm, mA, mB, n, normsA, normsB);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_seuclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *var_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *var;
+ if (!PyArg_ParseTuple(args, "O!O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &var_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ var = (double*)var_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_seuclidean(XA, XB, var, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_city_block_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_city_block(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_hamming_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_hamming(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_hamming_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_hamming_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_jaccard_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_jaccard(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_jaccard_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_jaccard_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!O!d",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &p)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+ cdist_minkowski(XA, XB, dm, mA, mB, n, p);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_weighted_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *w_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *w;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!O!dO!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &p,
+ &PyArray_Type, &w_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ w = (const double*)w_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+ cdist_weighted_minkowski(XA, XB, dm, mA, mB, n, p, w);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_yule_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_yule_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_matching_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_matching_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_dice_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_dice_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_rogerstanimoto_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_rogerstanimoto_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_russellrao_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_russellrao_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_kulsinski_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_kulsinski_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_sokalmichener_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_sokalmichener_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_sokalsneath_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_sokalsneath_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+/***************************** pdist ***/
+
+extern PyObject *pdist_euclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_euclidean(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_canberra_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_canberra(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_bray_curtis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_bray_curtis(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_mahalanobis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *covinv_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ const double *covinv;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &covinv_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ covinv = (const double*)covinv_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_mahalanobis(X, covinv, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_chebyshev_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_chebyshev(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_cosine_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *norms_;
+ int m, n;
+ double *dm;
+ const double *X, *norms;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &PyArray_Type, &norms_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ norms = (const double*)norms_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_cosine(X, dm, m, n, norms);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_seuclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *var_;
+ int m, n;
+ double *dm;
+ const double *X, *var;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &var_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ var = (double*)var_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_seuclidean(X, var, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_city_block_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_city_block(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_hamming_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_hamming(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_hamming_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_hamming_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_jaccard_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_jaccard(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_jaccard_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_jaccard_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm, *X;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!d",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &p)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_minkowski(X, dm, m, n, p);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_weighted_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *w_;
+ int m, n;
+ double *dm, *X, *w;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!dO!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &p,
+ &PyArray_Type, &w_)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ w = (const double*)w_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_weighted_minkowski(X, dm, m, n, p, w);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_yule_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_yule_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_matching_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_matching_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_dice_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_dice_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_rogerstanimoto_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_rogerstanimoto_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_russellrao_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_russellrao_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_kulsinski_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_kulsinski_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_sokalmichener_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_sokalmichener_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_sokalsneath_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_sokalsneath_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *to_squareform_from_vector_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *M_, *v_;
+ int n;
+ const double *v;
+ double *M;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &M_,
+ &PyArray_Type, &v_)) {
+ return 0;
+ }
+ else {
+ M = (double*)M_->data;
+ v = (const double*)v_->data;
+ n = M_->dimensions[0];
+ dist_to_squareform_from_vector(M, v, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *to_vector_from_squareform_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *M_, *v_;
+ int n;
+ double *v;
+ const double *M;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &M_,
+ &PyArray_Type, &v_)) {
+ return 0;
+ }
+ else {
+ M = (const double*)M_->data;
+ v = (double*)v_->data;
+ n = M_->dimensions[0];
+ dist_to_vector_from_squareform(M, v, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+static PyMethodDef _distanceWrapMethods[] = {
+ {"cdist_bray_curtis_wrap", cdist_bray_curtis_wrap, METH_VARARGS},
+ {"cdist_canberra_wrap", cdist_canberra_wrap, METH_VARARGS},
+ {"cdist_chebyshev_wrap", cdist_chebyshev_wrap, METH_VARARGS},
+ {"cdist_city_block_wrap", cdist_city_block_wrap, METH_VARARGS},
+ {"cdist_cosine_wrap", cdist_cosine_wrap, METH_VARARGS},
+ {"cdist_dice_bool_wrap", cdist_dice_bool_wrap, METH_VARARGS},
+ {"cdist_euclidean_wrap", cdist_euclidean_wrap, METH_VARARGS},
+ {"cdist_hamming_wrap", cdist_hamming_wrap, METH_VARARGS},
+ {"cdist_hamming_bool_wrap", cdist_hamming_bool_wrap, METH_VARARGS},
+ {"cdist_jaccard_wrap", cdist_jaccard_wrap, METH_VARARGS},
+ {"cdist_jaccard_bool_wrap", cdist_jaccard_bool_wrap, METH_VARARGS},
+ {"cdist_kulsinski_bool_wrap", cdist_kulsinski_bool_wrap, METH_VARARGS},
+ {"cdist_mahalanobis_wrap", cdist_mahalanobis_wrap, METH_VARARGS},
+ {"cdist_matching_bool_wrap", cdist_matching_bool_wrap, METH_VARARGS},
+ {"cdist_minkowski_wrap", cdist_minkowski_wrap, METH_VARARGS},
+ {"cdist_weighted_minkowski_wrap", cdist_weighted_minkowski_wrap, METH_VARARGS},
+ {"cdist_rogerstanimoto_bool_wrap", cdist_rogerstanimoto_bool_wrap, METH_VARARGS},
+ {"cdist_russellrao_bool_wrap", cdist_russellrao_bool_wrap, METH_VARARGS},
+ {"cdist_seuclidean_wrap", cdist_seuclidean_wrap, METH_VARARGS},
+ {"cdist_sokalmichener_bool_wrap", cdist_sokalmichener_bool_wrap, METH_VARARGS},
+ {"cdist_sokalsneath_bool_wrap", cdist_sokalsneath_bool_wrap, METH_VARARGS},
+ {"cdist_yule_bool_wrap", cdist_yule_bool_wrap, METH_VARARGS},
+ {"pdist_bray_curtis_wrap", pdist_bray_curtis_wrap, METH_VARARGS},
+ {"pdist_canberra_wrap", pdist_canberra_wrap, METH_VARARGS},
+ {"pdist_chebyshev_wrap", pdist_chebyshev_wrap, METH_VARARGS},
+ {"pdist_city_block_wrap", pdist_city_block_wrap, METH_VARARGS},
+ {"pdist_cosine_wrap", pdist_cosine_wrap, METH_VARARGS},
+ {"pdist_dice_bool_wrap", pdist_dice_bool_wrap, METH_VARARGS},
+ {"pdist_euclidean_wrap", pdist_euclidean_wrap, METH_VARARGS},
+ {"pdist_hamming_wrap", pdist_hamming_wrap, METH_VARARGS},
+ {"pdist_hamming_bool_wrap", pdist_hamming_bool_wrap, METH_VARARGS},
+ {"pdist_jaccard_wrap", pdist_jaccard_wrap, METH_VARARGS},
+ {"pdist_jaccard_bool_wrap", pdist_jaccard_bool_wrap, METH_VARARGS},
+ {"pdist_kulsinski_bool_wrap", pdist_kulsinski_bool_wrap, METH_VARARGS},
+ {"pdist_mahalanobis_wrap", pdist_mahalanobis_wrap, METH_VARARGS},
+ {"pdist_matching_bool_wrap", pdist_matching_bool_wrap, METH_VARARGS},
+ {"pdist_minkowski_wrap", pdist_minkowski_wrap, METH_VARARGS},
+ {"pdist_weighted_minkowski_wrap", pdist_weighted_minkowski_wrap, METH_VARARGS},
+ {"pdist_rogerstanimoto_bool_wrap", pdist_rogerstanimoto_bool_wrap, METH_VARARGS},
+ {"pdist_russellrao_bool_wrap", pdist_russellrao_bool_wrap, METH_VARARGS},
+ {"pdist_seuclidean_wrap", pdist_seuclidean_wrap, METH_VARARGS},
+ {"pdist_sokalmichener_bool_wrap", pdist_sokalmichener_bool_wrap, METH_VARARGS},
+ {"pdist_sokalsneath_bool_wrap", pdist_sokalsneath_bool_wrap, METH_VARARGS},
+ {"pdist_yule_bool_wrap", pdist_yule_bool_wrap, METH_VARARGS},
+ {"to_squareform_from_vector_wrap",
+ to_squareform_from_vector_wrap, METH_VARARGS},
+ {"to_vector_from_squareform_wrap",
+ to_vector_from_squareform_wrap, METH_VARARGS},
+ {NULL, NULL} /* Sentinel - marks the end of this structure */
+};
+
+#if PY_VERSION_HEX >= 0x03000000
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "_distance_wrap",
+ NULL,
+ -1,
+ _distanceWrapMethods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyObject *PyInit__distance_wrap(void)
+{
+ PyObject *m;
+
+ m = PyModule_Create(&moduledef);
+ import_array();
+
+ return m;
+}
+#else
+PyMODINIT_FUNC init_distance_wrap(void)
+{
+ (void) Py_InitModule("_distance_wrap", _distanceWrapMethods);
+ import_array(); // Must be present for NumPy. Called first after above line.
+}
+#endif
https://bitbucket.org/yt_analysis/yt/changeset/148e930ce903/
changeset: 148e930ce903
branch: yt
user: sskory
date: 2011-10-29 18:17:12
summary: Initial go at a boolean hybrid data container. Not completely working yet,
but much progress has been made.
affected #: 1 file
diff -r 1e5955fdd14d398a1521f78680b3232b32236dda -r 148e930ce903bbdb38b6269e34052676f5dd3deb yt/data_objects/data_containers.py
--- a/yt/data_objects/data_containers.py
+++ b/yt/data_objects/data_containers.py
@@ -62,6 +62,16 @@
NeedsProperty, \
NeedsParameter
+def force_array(item, shape):
+ try:
+ sh = item.shape
+ return item
+ except AttributeError:
+ if item:
+ return na.ones(shape, dtype='bool')
+ else:
+ return na.zeros(shape, dtype='bool')
+
def restore_grid_state(func):
"""
A decorator that takes a function with the API of (self, grid, field)
@@ -3401,6 +3411,140 @@
def flush_data(self, *args, **kwargs):
raise KeyError("Can't do this")
+class AMRBooleanRegionBase(AMR3DData):
+ """
+ A hybrid region built by boolean comparison between
+ existing regions.
+ """
+ _type_name = "boolean"
+ _con_args = {"regions"}
+ def __init__(self, regions, fields = None, pf = None, **kwargs):
+ """
+ This will build a hybrid region based on the boolean logic
+ of the regions.
+
+ Parameters
+ ----------
+ regions : list
+ A list of region objects and strings describing the boolean logic
+ to use when building the hybrid region. The boolean logic can be
+ nested using parentheses.
+
+ Examples
+ --------
+ >>> re1 = pf.h.region([0.5, 0.5, 0.5], [0.4, 0.4, 0.4],
+ [0.6, 0.6, 0.6])
+ >>> re2 = pf.h.region([0.5, 0.5, 0.5], [0.45, 0.45, 0.45],
+ [0.55, 0.55, 0.55])
+ >>> sp1 = pf.h.sphere([0.575, 0.575, 0.575], .03)
+ >>> toroid_shape = pf.h.boolean([re1, "NOT", re2])
+ >>> toroid_shape_with_hole = pf.h.boolean([re1, "NOT", "(", re2, "OR",
+ sp1, ")"])
+ """
+ # Center is meaningless, but we'll define it all the same.
+ AMR3DData.__init__(self, [0.5]*3, fields, pf, **kwargs)
+ self.regions = regions
+ self._all_regions = []
+ self._some_overlap = []
+ self._all_overlap = []
+ self._cut_masks = {}
+ self._get_all_regions()
+ self._make_overlaps()
+
+ def _get_all_regions(self):
+ # Before anything, we simply find out which regions are involved in all
+ # of this process, uniquely.
+ for item in self.regions:
+ if isinstance(item, types.StringType): continue
+ self._all_regions.append(item)
+ self._all_regions = na.unique(self._all_regions)
+
+ def _make_overlaps(self):
+ # Using the processed cut_masks, we'll figure out what grids
+ # are left in the hybrid region.
+ for region in self._all_regions:
+ region._get_list_of_grids()
+ for grid in region._grids:
+ if grid in self._some_overlap or grid in self._all_overlap:
+ continue
+ # Get the cut_mask for this grid in this region, and see
+ # if there's any overlap with the overall cut_mask.
+ overall = self._get_cut_mask(grid)
+ local = force_array(region._get_cut_mask(grid),
+ grid.ActiveDimensions)
+ if (local == na.bitwise_and(overall, local)).all():
+ # All of local is in overall
+ self._all_overlap.append(grid)
+ continue
+ if (overall == local).any():
+ # Some of local is in overall
+ self._some_overlap.append(grid)
+ continue
+
+ def _is_fully_enclosed(self, grid):
+ return (grid in self._all_overlap)
+
+ def _get_list_of_grids(self):
+ self._grids = na.array(self._some_overlap + self._all_overlap,
+ dtype='object')
+
+ def _get_cut_mask(self, grid, field=None):
+ if self._is_fully_enclosed(grid):
+ return True # We do not want child masking here
+ if not isinstance(grid, (FakeGridForParticles, GridChildMaskWrapper)) \
+ and grid.id in self._cut_masks:
+ return self._cut_masks[grid.id]
+ # If we get this far, we have to generate the cut_mask.
+ return self._get_level_mask(self.regions, grid)
+
+ def _get_level_mask(self, ops, grid):
+ level_masks = []
+ end = 0
+ for i, item in enumerate(ops):
+ if end > 0 and i < end:
+ # We skip over things inside parentheses on this level.
+ continue
+ if isinstance(item, AMRData):
+ # Add this regions cut_mask to level_masks
+ level_masks.append(force_array(item._get_cut_mask(grid),
+ grid.ActiveDimensions))
+ elif item == "AND" or item == "NOT" or item == "OR":
+ level_masks.append(item)
+ elif item == "(":
+ # recurse down, and we'll append the results, which
+ # should be a single cut_mask
+ open_count = 0
+ for ii, item in enumerate(ops[i + 1:]):
+ # We look for the matching closing parentheses to find
+ # where we slice ops.
+ if item == "(":
+ open_count += 1
+ if item == ")" and open_count > 0:
+ open_count -= 1
+ elif item == ")" and open_count == 0:
+ end = i + ii + 1
+ break
+ level_masks.append(force_array(self._get_level_mask(ops[i + 1:end],
+ grid), grid.ActiveDimensions))
+ # Now we do the logic on our level_mask.
+ # There should be no nested logic anymore.
+ # The first item should be a cut_mask,
+ # so that will be our starting point.
+ this_cut_mask = level_masks[0]
+ for i, item in enumerate(level_masks):
+ # I could use a slice above, but I'll keep i consistent instead.
+ if i == 0: continue
+ if item == "AND":
+ # So, the next item in level_masks we want to AND.
+ na.bitwise_and(this_cut_mask, level_masks[i+1], this_cut_mask)
+ if item == "NOT":
+ # It's convenient to remember that NOT == AND NOT
+ na.bitwise_and(this_cut_mask, na.invert(level_masks[i+1]),
+ this_cut_mask)
+ if item == "OR":
+ na.bitwise_or(this_cut_mask, level_masks[i+1], this_cut_mask)
+ return this_cut_mask
+
def _reconstruct_object(*args, **kwargs):
pfid = args[0]
https://bitbucket.org/yt_analysis/yt/changeset/04d0a3952d81/
changeset: 04d0a3952d81
branch: yt
user: sskory
date: 2011-10-31 16:33:13
summary: Merge.
affected #: 16 files
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/analysis_modules/halo_finding/halo_objects.py
--- a/yt/analysis_modules/halo_finding/halo_objects.py
+++ b/yt/analysis_modules/halo_finding/halo_objects.py
@@ -1376,7 +1376,8 @@
["ParticleMassMsun", "particle_index"]
def __init__(self, data_source, padding, num_neighbors, bounds, total_mass,
- period, threshold=160.0, dm_only=True, rearrange=True, premerge=True):
+ period, threshold=160.0, dm_only=True, rearrange=True, premerge=True,
+ tree = 'F'):
"""
Run hop on *data_source* with a given density *threshold*. If
*dm_only* is set, only run it on the dark matter particles, otherwise
@@ -1393,6 +1394,7 @@
self.period = na.array([1.]*3)
self._data_source = data_source
self.premerge = premerge
+ self.tree = tree
mylog.info("Initializing HOP")
HaloList.__init__(self, data_source, dm_only)
@@ -1421,7 +1423,8 @@
obj = ParallelHOPHaloFinder(self.period, self.padding,
self.num_neighbors, self.bounds,
self.particle_fields,
- self.threshold, rearrange=self.rearrange, premerge=self.premerge)
+ self.threshold, rearrange=self.rearrange, premerge=self.premerge,
+ tree = self.tree)
self.densities, self.tags = obj.density, obj.chainID
# I'm going to go ahead and delete self.densities because it's not
# actually being used. I'm not going to remove it altogether because
@@ -1780,7 +1783,7 @@
def __init__(self, pf, subvolume=None,threshold=160, dm_only=True, \
resize=True, rearrange=True,\
fancy_padding=True, safety=1.5, premerge=True, sample=0.03, \
- total_mass=None, num_particles=None):
+ total_mass=None, num_particles=None, tree = 'F'):
r"""Parallel HOP halo finder.
Halos are built by:
@@ -1810,9 +1813,16 @@
Default = False.
resize : bool
Turns load-balancing on or off. Default = True.
+ kdtree : string
+ Chooses which kD Tree to use. The Fortran one (kdtree = 'F') is
+ faster, but uses more memory. The Cython one (kdtree = 'C') is
+ slower but is more memory efficient.
+ Default = 'F'
rearrange : bool
Turns on faster nearest neighbor searches at the cost of increased
- memory usage. Default = True.
+ memory usage.
+ This option only applies when using the Fortran tree.
+ Default = True.
fancy_padding : bool
True calculates padding independently for each face of each
subvolume. Default = True.
@@ -1862,6 +1872,9 @@
self.num_neighbors = 65
self.safety = safety
self.sample = sample
+ self.tree = tree
+ if self.tree != 'F' and self.tree != 'C':
+ mylog.error("No kD Tree specified!")
period = pf.domain_right_edge - pf.domain_left_edge
topbounds = na.array([[0., 0., 0.], period])
# Cut up the volume evenly initially, with no padding.
@@ -1969,7 +1982,8 @@
(LE_padding, RE_padding) = self.padding
parallelHOPHaloList.__init__(self, self._data_source, self.padding, \
self.num_neighbors, self.bounds, total_mass, period, \
- threshold=threshold, dm_only=dm_only, rearrange=rearrange, premerge=premerge)
+ threshold=threshold, dm_only=dm_only, rearrange=rearrange, premerge=premerge,
+ tree = self.tree)
self._join_halolists()
yt_counters("Final Grouping")
@@ -2120,6 +2134,7 @@
mass in the entire volume.
Default = None, which means the total mass is automatically
calculated.
+
Examples
--------
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/analysis_modules/halo_finding/parallel_hop/parallel_hop_interface.py
--- a/yt/analysis_modules/halo_finding/parallel_hop/parallel_hop_interface.py
+++ b/yt/analysis_modules/halo_finding/parallel_hop/parallel_hop_interface.py
@@ -38,6 +38,8 @@
except ImportError:
mylog.debug("The Fortran kD-Tree did not import correctly.")
+from yt.utilities.spatial import cKDTree
+
from yt.utilities.parallel_tools.parallel_analysis_interface import \
parallel_blocking_call, \
ParallelAnalysisInterface
@@ -45,7 +47,7 @@
class ParallelHOPHaloFinder(ParallelAnalysisInterface):
def __init__(self,period, padding, num_neighbors, bounds,
particle_fields, threshold=160.0, rearrange=True,
- premerge=True):
+ premerge=True, tree='F'):
ParallelAnalysisInterface.__init__(self)
self.threshold = threshold
self.rearrange = rearrange
@@ -64,6 +66,7 @@
self.mass = particle_fields.pop("ParticleMassMsun")
self.padded_particles = []
self.nMerge = 4
+ self.tree = tree
yt_counters("chainHOP")
self.max_mem = 0
self.__max_memory()
@@ -342,34 +345,50 @@
Set up the data objects that get passed to the kD-tree code.
"""
yt_counters("init kd tree")
- # Yes, we really do need to initialize this many arrays.
- # They're deleted in _parallelHOP.
- fKD.dens = na.zeros(self.size, dtype='float64', order='F')
- fKD.mass = na.concatenate((self.mass, self.mass_pad))
- del self.mass
- fKD.pos = na.empty((3, self.size), dtype='float64', order='F')
- # This actually copies the data into the fortran space.
- self.psize = self.xpos.size
- fKD.pos[0, :self.psize] = self.xpos
- fKD.pos[1, :self.psize] = self.ypos
- fKD.pos[2, :self.psize] = self.zpos
- del self.xpos, self.ypos, self.zpos
- gc.collect()
- fKD.pos[0, self.psize:] = self.xpos_pad
- fKD.pos[1, self.psize:] = self.ypos_pad
- fKD.pos[2, self.psize:] = self.zpos_pad
- del self.xpos_pad, self.ypos_pad, self.zpos_pad
- gc.collect()
- fKD.qv = na.asfortranarray(na.empty(3, dtype='float64'))
- fKD.nn = self.num_neighbors
- # Plus 2 because we're looking for that neighbor, but only keeping
- # nMerge + 1 neighbor tags, skipping ourselves.
- fKD.nMerge = self.nMerge + 2
- fKD.nparts = self.size
- fKD.sort = True # Slower, but needed in _connect_chains
- fKD.rearrange = self.rearrange # True is faster, but uses more memory
- # Now call the fortran.
- create_tree(0)
+ if self.tree == 'F':
+ # Yes, we really do need to initialize this many arrays.
+ # They're deleted in _parallelHOP.
+ fKD.dens = na.zeros(self.size, dtype='float64', order='F')
+ fKD.mass = na.concatenate((self.mass, self.mass_pad))
+ del self.mass
+ fKD.pos = na.empty((3, self.size), dtype='float64', order='F')
+ # This actually copies the data into the fortran space.
+ self.psize = self.xpos.size
+ fKD.pos[0, :self.psize] = self.xpos
+ fKD.pos[1, :self.psize] = self.ypos
+ fKD.pos[2, :self.psize] = self.zpos
+ del self.xpos, self.ypos, self.zpos
+ gc.collect()
+ fKD.pos[0, self.psize:] = self.xpos_pad
+ fKD.pos[1, self.psize:] = self.ypos_pad
+ fKD.pos[2, self.psize:] = self.zpos_pad
+ del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ gc.collect()
+ fKD.qv = na.asfortranarray(na.empty(3, dtype='float64'))
+ fKD.nn = self.num_neighbors
+ # Plus 2 because we're looking for that neighbor, but only keeping
+ # nMerge + 1 neighbor tags, skipping ourselves.
+ fKD.nMerge = self.nMerge + 2
+ fKD.nparts = self.size
+ fKD.sort = True # Slower, but needed in _connect_chains
+ fKD.rearrange = self.rearrange # True is faster, but uses more memory
+ # Now call the fortran.
+ create_tree(0)
+ elif self.tree == 'C':
+ self.mass = na.concatenate((self.mass, self.mass_pad))
+ self.pos = na.empty((self.size, 3), dtype='float64')
+ self.psize = self.xpos.size
+ self.pos[:self.psize, 0] = self.xpos
+ self.pos[:self.psize, 1] = self.ypos
+ self.pos[:self.psize, 2] = self.zpos
+ del self.xpos, self.ypos, self.zpos
+ gc.collect()
+ self.pos[self.psize:, 0] = self.xpos_pad
+ self.pos[self.psize:, 1] = self.ypos_pad
+ self.pos[self.psize:, 2] = self.zpos_pad
+ del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ gc.collect()
+ self.kdtree = cKDTree(self.pos, leafsize = 32)
self.__max_memory()
yt_counters("init kd tree")
@@ -395,8 +414,12 @@
self.is_inside = ( (points >= LE).all(axis=1) * \
(points < RE).all(axis=1) )
elif round == 'second':
- self.is_inside = ( (fKD.pos.T >= LE).all(axis=1) * \
- (fKD.pos.T < RE).all(axis=1) )
+ if self.tree == 'F':
+ self.is_inside = ( (fKD.pos.T >= LE).all(axis=1) * \
+ (fKD.pos.T < RE).all(axis=1) )
+ elif self.tree == 'C':
+ self.is_inside = ( (self.pos > LE).all(axis=1) * \
+ (self.pos < RE).all(axis=1) )
# Below we find out which particles are in the `annulus', one padding
# distance inside the boundaries. First we find the particles outside
# this inner boundary.
@@ -406,8 +429,12 @@
inner = na.invert( (points >= temp_LE).all(axis=1) * \
(points < temp_RE).all(axis=1) )
elif round == 'second' or round == 'third':
- inner = na.invert( (fKD.pos.T >= temp_LE).all(axis=1) * \
- (fKD.pos.T < temp_RE).all(axis=1) )
+ if self.tree == 'F':
+ inner = na.invert( (fKD.pos.T >= temp_LE).all(axis=1) * \
+ (fKD.pos.T < temp_RE).all(axis=1) )
+ elif self.tree == 'C':
+ inner = na.invert( (self.pos >= temp_LE).all(axis=1) * \
+ (self.pos < temp_RE).all(axis=1) )
if round == 'first':
del points
# After inverting the logic above, we want points that are both
@@ -444,26 +471,44 @@
self.densestNN = na.empty(self.size,dtype='int64')
# We find nearest neighbors in chunks.
chunksize = 10000
- fKD.chunk_tags = na.asfortranarray(na.empty((self.num_neighbors, chunksize), dtype='int64'))
- start = 1 # Fortran counting!
- finish = 0
- while finish < self.size:
- finish = min(finish+chunksize,self.size)
- # Call the fortran. start and finish refer to the data locations
- # in fKD.pos, and specify the range of particles to find nearest
- # neighbors
- fKD.start = start
- fKD.finish = finish
- find_chunk_nearest_neighbors()
- chunk_NNtags = (fKD.chunk_tags[:,:finish-start+1] - 1).transpose()
- # Find the densest nearest neighbors by referencing the already
- # calculated density.
- n_dens = na.take(self.density,chunk_NNtags)
- max_loc = na.argmax(n_dens,axis=1)
- for i in xrange(finish - start + 1): # +1 for fortran counting.
- j = start + i - 1 # -1 for fortran counting.
- self.densestNN[j] = chunk_NNtags[i,max_loc[i]]
- start = finish + 1
+ if self.tree == 'F':
+ fKD.chunk_tags = na.asfortranarray(na.empty((self.num_neighbors, chunksize), dtype='int64'))
+ start = 1 # Fortran counting!
+ finish = 0
+ while finish < self.size:
+ finish = min(finish+chunksize,self.size)
+ # Call the fortran. start and finish refer to the data locations
+ # in fKD.pos, and specify the range of particles to find nearest
+ # neighbors
+ fKD.start = start
+ fKD.finish = finish
+ find_chunk_nearest_neighbors()
+ chunk_NNtags = (fKD.chunk_tags[:,:finish-start+1] - 1).transpose()
+ # Find the densest nearest neighbors by referencing the already
+ # calculated density.
+ n_dens = na.take(self.density,chunk_NNtags)
+ max_loc = na.argmax(n_dens,axis=1)
+ for i in xrange(finish - start + 1): # +1 for fortran counting.
+ j = start + i - 1 # -1 for fortran counting.
+ self.densestNN[j] = chunk_NNtags[i,max_loc[i]]
+ start = finish + 1
+ elif self.tree == 'C':
+ start = 0
+ finish = 0
+ while finish < self.size - 1:
+ finish = min(finish+chunksize, self.size)
+ # Unlike above, this function returns a new chunk_NNtags
+ # that is the right size every time. But this may not actually
+ # be as memory efficient - fragmenting?
+ chunk_NNtags = self.kdtree.find_chunk_nearest_neighbors(start, \
+ finish, num_neighbors=self.num_neighbors)
+ n_dens = na.take(self.density, chunk_NNtags)
+ max_loc = na.argmax(n_dens, axis=1)
+ max_loc = na.argmax(n_dens,axis=1)
+ for i in xrange(finish - start):
+ j = start + i
+ self.densestNN[j] = chunk_NNtags[i,max_loc[i]]
+ start = finish
yt_counters("densestNN")
self.__max_memory()
del chunk_NNtags, max_loc, n_dens
@@ -568,12 +613,15 @@
chain_map = defaultdict(set)
for i in xrange(max(self.chainID)+1):
chain_map[i].add(i)
- # Plus 2 because we're looking for that neighbor, but only keeping
- # nMerge + 1 neighbor tags, skipping ourselves.
- fKD.dist = na.empty(self.nMerge+2, dtype='float64')
- fKD.tags = na.empty(self.nMerge+2, dtype='int64')
- # We can change this here to make the searches faster.
- fKD.nn = self.nMerge+2
+ if self.tree == 'F':
+ # Plus 2 because we're looking for that neighbor, but only keeping
+ # nMerge + 1 neighbor tags, skipping ourselves.
+ fKD.dist = na.empty(self.nMerge+2, dtype='float64')
+ fKD.tags = na.empty(self.nMerge+2, dtype='int64')
+ # We can change this here to make the searches faster.
+ fKD.nn = self.nMerge + 2
+ elif self.tree == 'C':
+ nn = self.nMerge + 2
yt_counters("preconnect kd tree search.")
for i in xrange(self.size):
# Don't consider this particle if it's not part of a chain.
@@ -586,9 +634,13 @@
# We're only connecting >= peakthresh chains now.
if part_max_dens < self.peakthresh: continue
# Loop over nMerge closest nearest neighbors.
- fKD.qv = fKD.pos[:, i]
- find_nn_nearest_neighbors()
- NNtags = fKD.tags[:] - 1
+ if self.tree == 'F':
+ fKD.qv = fKD.pos[:, i]
+ find_nn_nearest_neighbors()
+ NNtags = fKD.tags[:] - 1
+ elif self.tree == 'C':
+ qv = self.pos[i, :]
+ NNtags = self.kdtree.query(qv, nn)[1]
same_count = 0
for j in xrange(int(self.nMerge+1)):
thisNN = NNtags[j+1] # Don't consider ourselves at NNtags[0]
@@ -1002,10 +1054,13 @@
self.chain_densest_n = {} # chainID -> {chainIDs->boundary dens}
# Plus 2 because we're looking for that neighbor, but only keeping
# nMerge + 1 neighbor tags, skipping ourselves.
- fKD.dist = na.empty(self.nMerge+2, dtype='float64')
- fKD.tags = na.empty(self.nMerge+2, dtype='int64')
- # We can change this here to make the searches faster.
- fKD.nn = self.nMerge+2
+ if self.tree == 'F':
+ fKD.dist = na.empty(self.nMerge+2, dtype='float64')
+ fKD.tags = na.empty(self.nMerge+2, dtype='int64')
+ # We can change this here to make the searches faster.
+ fKD.nn = self.nMerge+2
+ elif self.tree == 'C':
+ nn = self.nMerge + 2
for i in xrange(int(self.size)):
# Don't consider this particle if it's not part of a chain.
if self.chainID[i] < 0: continue
@@ -1018,9 +1073,13 @@
# Make sure we're skipping deleted chains.
if part_max_dens == -1.0: continue
# Loop over nMerge closest nearest neighbors.
- fKD.qv = fKD.pos[:, i]
- find_nn_nearest_neighbors()
- NNtags = fKD.tags[:] - 1
+ if self.tree == 'F':
+ fKD.qv = fKD.pos[:, i]
+ find_nn_nearest_neighbors()
+ NNtags = fKD.tags[:] - 1
+ elif self.tree == 'C':
+ qv = self.pos[i, :]
+ NNtags = self.kdtree.query(qv, nn)[1]
for j in xrange(int(self.nMerge+1)):
thisNN = NNtags[j+1] # Don't consider ourselves at NNtags[0]
thisNN_chainID = self.chainID[thisNN]
@@ -1345,11 +1404,14 @@
select = (self.chainID != -1)
calc = len(na.where(select == True)[0])
loc = na.empty((calc, 3), dtype='float64')
- loc[:, 0] = na.concatenate((self.xpos, self.xpos_pad))[select]
- loc[:, 1] = na.concatenate((self.ypos, self.ypos_pad))[select]
- loc[:, 2] = na.concatenate((self.zpos, self.zpos_pad))[select]
- self.__max_memory()
- del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ if self.tree == 'F':
+ loc[:, 0] = na.concatenate((self.xpos, self.xpos_pad))[select]
+ loc[:, 1] = na.concatenate((self.ypos, self.ypos_pad))[select]
+ loc[:, 2] = na.concatenate((self.zpos, self.zpos_pad))[select]
+ self.__max_memory()
+ del self.xpos_pad, self.ypos_pad, self.zpos_pad
+ elif self.tree == 'C':
+ loc = self.pos[select]
subchain = self.chainID[select]
# First we need to find the maximum density point for all groups.
# I think this will be faster than several vector operations that need
@@ -1470,10 +1532,17 @@
# Loop over the particles to find NN for each.
mylog.info('Finding nearest neighbors/density...')
yt_counters("chainHOP_tags_dens")
- chainHOP_tags_dens()
+ if self.tree == 'F':
+ chainHOP_tags_dens()
+ elif self.tree == 'C':
+ self.density = self.kdtree.chainHOP_get_dens(self.mass, \
+ num_neighbors = self.num_neighbors, nMerge = self.nMerge + 2)
yt_counters("chainHOP_tags_dens")
- self.density = fKD.dens.copy()
- # Now each particle has NNtags, and a local self density.
+ if self.tree == 'F':
+ self.density = fKD.dens.copy()
+ elif self.tree == 'C':
+ pass
+ # Now each particle a local self density.
# Let's find densest NN
mylog.info('Finding densest nearest neighbors...')
self._densestNN()
@@ -1496,17 +1565,22 @@
self._communicate_annulus_chainIDs()
mylog.info('Connecting %d chains into groups...' % self.nchains)
self._connect_chains()
- self.mass = fKD.mass[:self.psize]
- self.mass_pad = fKD.mass[self.psize:]
- del fKD.dens, fKD.mass, fKD.dens
- self.xpos = fKD.pos[0, :self.psize]
- self.ypos = fKD.pos[1, :self.psize]
- self.zpos = fKD.pos[2, :self.psize]
- self.xpos_pad = fKD.pos[0, self.psize:]
- self.ypos_pad = fKD.pos[1, self.psize:]
- self.zpos_pad = fKD.pos[2, self.psize:]
- del fKD.pos, fKD.chunk_tags
- free_tree(0) # Frees the kdtree object.
+ if self.tree == 'F':
+ self.mass = fKD.mass[:self.psize]
+ self.mass_pad = fKD.mass[self.psize:]
+ del fKD.dens, fKD.mass, fKD.dens
+ self.xpos = fKD.pos[0, :self.psize]
+ self.ypos = fKD.pos[1, :self.psize]
+ self.zpos = fKD.pos[2, :self.psize]
+ self.xpos_pad = fKD.pos[0, self.psize:]
+ self.ypos_pad = fKD.pos[1, self.psize:]
+ self.zpos_pad = fKD.pos[2, self.psize:]
+ del fKD.pos, fKD.chunk_tags
+ free_tree(0) # Frees the kdtree object.
+ gc.collect()
+ elif self.tree == 'C':
+ del self.kdtree
+ gc.collect()
del self.densestNN
mylog.info('Communicating group links globally...')
self._make_global_chain_densest_n()
@@ -1530,7 +1604,10 @@
for groupID in self.I_own[taskID]:
self.halo_taskmap[groupID].add(taskID)
del self.I_own
- del self.xpos, self.ypos, self.zpos
+ if self.tree == 'F':
+ del self.xpos, self.ypos, self.zpos
+ elif self.tree == 'C':
+ pass
def __add_to_array(self, arr, key, value, type):
"""
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/data_objects/data_containers.py
--- a/yt/data_objects/data_containers.py
+++ b/yt/data_objects/data_containers.py
@@ -62,6 +62,16 @@
NeedsProperty, \
NeedsParameter
+def force_array(item, shape):
+ try:
+ sh = item.shape
+ return item
+ except AttributeError:
+ if item:
+ return na.ones(shape, dtype='bool')
+ else:
+ return na.zeros(shape, dtype='bool')
+
def restore_grid_state(func):
"""
A decorator that takes a function with the API of (self, grid, field)
@@ -3404,6 +3414,140 @@
def flush_data(self, *args, **kwargs):
raise KeyError("Can't do this")
+class AMRBooleanRegionBase(AMR3DData):
+ """
+ A hybrid region built by boolean comparison between
+ existing regions.
+ """
+ _type_name = "boolean"
+ _con_args = {"regions"}
+ def __init__(self, regions, fields = None, pf = None, **kwargs):
+ """
+ This will build a hybrid region based on the boolean logic
+ of the regions.
+
+ Parameters
+ ----------
+ regions : list
+ A list of region objects and strings describing the boolean logic
+ to use when building the hybrid region. The boolean logic can be
+ nested using parentheses.
+
+ Examples
+ --------
+ >>> re1 = pf.h.region([0.5, 0.5, 0.5], [0.4, 0.4, 0.4],
+ [0.6, 0.6, 0.6])
+ >>> re2 = pf.h.region([0.5, 0.5, 0.5], [0.45, 0.45, 0.45],
+ [0.55, 0.55, 0.55])
+ >>> sp1 = pf.h.sphere([0.575, 0.575, 0.575], .03)
+ >>> toroid_shape = pf.h.boolean([re1, "NOT", re2])
+ >>> toroid_shape_with_hole = pf.h.boolean([re1, "NOT", "(", re2, "OR",
+ sp1, ")"])
+ """
+ # Center is meaningless, but we'll define it all the same.
+ AMR3DData.__init__(self, [0.5]*3, fields, pf, **kwargs)
+ self.regions = regions
+ self._all_regions = []
+ self._some_overlap = []
+ self._all_overlap = []
+ self._cut_masks = {}
+ self._get_all_regions()
+ self._make_overlaps()
+
+ def _get_all_regions(self):
+ # Before anything, we simply find out which regions are involved in all
+ # of this process, uniquely.
+ for item in self.regions:
+ if isinstance(item, types.StringType): continue
+ self._all_regions.append(item)
+ self._all_regions = na.unique(self._all_regions)
+
+ def _make_overlaps(self):
+ # Using the processed cut_masks, we'll figure out what grids
+ # are left in the hybrid region.
+ for region in self._all_regions:
+ region._get_list_of_grids()
+ for grid in region._grids:
+ if grid in self._some_overlap or grid in self._all_overlap:
+ continue
+ # Get the cut_mask for this grid in this region, and see
+ # if there's any overlap with the overall cut_mask.
+ overall = self._get_cut_mask(grid)
+ local = force_array(region._get_cut_mask(grid),
+ grid.ActiveDimensions)
+ if (local == na.bitwise_and(overall, local)).all():
+ # All of local is in overall
+ self._all_overlap.append(grid)
+ continue
+ if (overall == local).any():
+ # Some of local is in overall
+ self._some_overlap.append(grid)
+ continue
+
+ def _is_fully_enclosed(self, grid):
+ return (grid in self._all_overlap)
+
+ def _get_list_of_grids(self):
+ self._grids = na.array(self._some_overlap + self._all_overlap,
+ dtype='object')
+
+ def _get_cut_mask(self, grid, field=None):
+ if self._is_fully_enclosed(grid):
+ return True # We do not want child masking here
+ if not isinstance(grid, (FakeGridForParticles, GridChildMaskWrapper)) \
+ and grid.id in self._cut_masks:
+ return self._cut_masks[grid.id]
+ # If we get this far, we have to generate the cut_mask.
+ return self._get_level_mask(self.regions, grid)
+
+ def _get_level_mask(self, ops, grid):
+ level_masks = []
+ end = 0
+ for i, item in enumerate(ops):
+ if end > 0 and i < end:
+ # We skip over things inside parentheses on this level.
+ continue
+ if isinstance(item, AMRData):
+ # Add this regions cut_mask to level_masks
+ level_masks.append(force_array(item._get_cut_mask(grid),
+ grid.ActiveDimensions))
+ elif item == "AND" or item == "NOT" or item == "OR":
+ level_masks.append(item)
+ elif item == "(":
+ # recurse down, and we'll append the results, which
+ # should be a single cut_mask
+ open_count = 0
+ for ii, item in enumerate(ops[i + 1:]):
+ # We look for the matching closing parentheses to find
+ # where we slice ops.
+ if item == "(":
+ open_count += 1
+ if item == ")" and open_count > 0:
+ open_count -= 1
+ elif item == ")" and open_count == 0:
+ end = i + ii + 1
+ break
+ level_masks.append(force_array(self._get_level_mask(ops[i + 1:end],
+ grid), grid.ActiveDimensions))
+ # Now we do the logic on our level_mask.
+ # There should be no nested logic anymore.
+ # The first item should be a cut_mask,
+ # so that will be our starting point.
+ this_cut_mask = level_masks[0]
+ for i, item in enumerate(level_masks):
+ # I could use a slice above, but I'll keep i consistent instead.
+ if i == 0: continue
+ if item == "AND":
+ # So, the next item in level_masks we want to AND.
+ na.bitwise_and(this_cut_mask, level_masks[i+1], this_cut_mask)
+ if item == "NOT":
+ # It's convenient to remember that NOT == AND NOT
+ na.bitwise_and(this_cut_mask, na.invert(level_masks[i+1]),
+ this_cut_mask)
+ if item == "OR":
+ na.bitwise_or(this_cut_mask, level_masks[i+1], this_cut_mask)
+ return this_cut_mask
+
def _reconstruct_object(*args, **kwargs):
pfid = args[0]
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/setup.py
--- a/yt/utilities/setup.py
+++ b/yt/utilities/setup.py
@@ -148,6 +148,8 @@
config.add_subpackage("delaunay") # From SciPy, written by Robert Kern
config.add_subpackage("kdtree")
config.add_data_files(('kdtree', ['kdtree/fKDpy.so',]))
+ config.add_extension('spatial', ["yt/utilities/spatial/ckdtree.pyx"],
+ libraries=["m"])
config.add_subpackage("parallel_tools")
config.add_extension("data_point_utilities",
"yt/utilities/data_point_utilities.c", libraries=["m"])
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/README
--- /dev/null
+++ b/yt/utilities/spatial/README
@@ -0,0 +1,35 @@
+Stephen Skory
+s at skory.us
+October 2011
+
+This directory is a modified version of the same directory that is part of
+the scipy.spatial package. It has been modified by me in the following
+ways:
+
+- In ckdtree.pyx, distances and searches over the
+ tree both take periodic boundary
+ conditions into account.
+
+- In ckdtree.pyx, all input and output arrays now
+ use 64-bit types: long and double.
+
+- In ckdtree.pyx, I've added two functions specifically for parallel HOP,
+ chainHOP_get_dens and find_chunk_nearest_neighbors.
+
+- In kdtree.py, I've commented out 'import scipy.sparse',
+ which means that any kdtree functionality that uses sparse
+ will not work. This is to avoid needing to build the rest
+ of scipy, which is a challenge and not necessary for just
+ the kdtree.
+
+- I've removed all of the qhull source and functionality.
+
+- I've removed the 'tests' directory.
+
+- I've removed anything having to do with Bento, the
+ python package manager.
+
+Anything that has been removed can be found in the original scipy
+source distribution.
+
+
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/__init__.py
--- /dev/null
+++ b/yt/utilities/spatial/__init__.py
@@ -0,0 +1,34 @@
+"""
+=============================================================
+Spatial algorithms and data structures (:mod:`scipy.spatial`)
+=============================================================
+
+Nearest-neighbor queries:
+
+.. autosummary::
+ :toctree: generated/
+
+ KDTree -- class for efficient nearest-neighbor queries
+ cKDTree -- class for efficient nearest-neighbor queries (faster impl.)
+ distance -- module containing many different distance measures
+
+Delaunay triangulation:
+
+.. autosummary::
+ :toctree: generated/
+
+ Delaunay
+ tsearch
+
+"""
+
+from kdtree import *
+from ckdtree import *
+#from qhull import *
+
+__all__ = filter(lambda s:not s.startswith('_'),dir())
+__all__ += ['distance']
+
+import distance
+from numpy.testing import Tester
+test = Tester().test
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/ckdtree.pyx
--- /dev/null
+++ b/yt/utilities/spatial/ckdtree.pyx
@@ -0,0 +1,758 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import numpy as np
+cimport numpy as np
+cimport stdlib
+cimport cython
+
+import kdtree
+
+cdef double infinity = np.inf
+
+__all__ = ['cKDTree']
+
+
+# priority queue
+cdef union heapcontents:
+ int intdata
+ char* ptrdata
+
+cdef struct heapitem:
+ double priority
+ heapcontents contents
+
+cdef struct heap:
+ int n
+ heapitem* heap
+ int space
+
+cdef inline heapcreate(heap* self,int initial_size):
+ self.space = initial_size
+ self.heap = <heapitem*>stdlib.malloc(sizeof(heapitem)*self.space)
+ self.n=0
+
+cdef inline heapdestroy(heap* self):
+ stdlib.free(self.heap)
+
+cdef inline heapresize(heap* self, int new_space):
+ if new_space<self.n:
+ raise ValueError("Heap containing %d items cannot be resized to %d" % (self.n, new_space))
+ self.space = new_space
+ self.heap = <heapitem*>stdlib.realloc(<void*>self.heap,new_space*sizeof(heapitem))
+
+cdef inline heappush(heap* self, heapitem item):
+ cdef int i
+ cdef heapitem t
+
+ self.n += 1
+ if self.n>self.space:
+ heapresize(self,2*self.space+1)
+
+ i = self.n-1
+ self.heap[i] = item
+ while i>0 and self.heap[i].priority<self.heap[(i-1)//2].priority:
+ t = self.heap[(i-1)//2]
+ self.heap[(i-1)//2] = self.heap[i]
+ self.heap[i] = t
+ i = (i-1)//2
+
+cdef heapitem heappeek(heap* self):
+ return self.heap[0]
+
+cdef heapremove(heap* self):
+ cdef heapitem t
+ cdef int i, j, k, l
+
+ self.heap[0] = self.heap[self.n-1]
+ self.n -= 1
+ if self.n < self.space//4 and self.space>40: #FIXME: magic number
+ heapresize(self,self.space//2+1)
+
+ i=0
+ j=1
+ k=2
+ while ((j<self.n and
+ self.heap[i].priority > self.heap[j].priority or
+ k<self.n and
+ self.heap[i].priority > self.heap[k].priority)):
+ if k<self.n and self.heap[j].priority>self.heap[k].priority:
+ l = k
+ else:
+ l = j
+ t = self.heap[l]
+ self.heap[l] = self.heap[i]
+ self.heap[i] = t
+ i = l
+ j = 2*i+1
+ k = 2*i+2
+
+cdef heapitem heappop(heap* self):
+ cdef heapitem it
+ it = heappeek(self)
+ heapremove(self)
+ return it
+
+
+
+
+
+# utility functions
+cdef inline double dmax(double x, double y):
+ if x>y:
+ return x
+ else:
+ return y
+cdef inline double dabs(double x):
+ if x>0:
+ return x
+ else:
+ return -x
+cdef inline double dmin(double x, double y):
+ if x<y:
+ return x
+ else:
+ return y
+cdef inline double _distance_p(double*x,double*y,double p,int k,double upperbound,
+ double*period):
+ """Compute the distance between x and y
+
+ Computes the Minkowski p-distance to the power p between two points.
+ If the distance**p is larger than upperbound, then any number larger
+ than upperbound may be returned (the calculation is truncated).
+
+ Periodicity added by S. Skory.
+ """
+ cdef int i
+ cdef double r, m
+ r = 0
+ if p==infinity:
+ for i in range(k):
+ m = dmin(dabs(x[i] - y[i]), period[i] - dabs(x[i] - y[i]))
+ r = dmax(r,m)
+ if r>upperbound:
+ return r
+ elif p==1:
+ for i in range(k):
+ m = dmin(dabs(x[i] - y[i]), period[i] - dabs(x[i] - y[i]))
+ r += m
+ if r>upperbound:
+ return r
+ else:
+ for i in range(k):
+ m = dmin(dabs(x[i] - y[i]), period[i] - dabs(x[i] - y[i]))
+ r += m**p
+ if r>upperbound:
+ return r
+ return r
+
+
+
+# Tree structure
+cdef struct innernode:
+ int split_dim
+ int n_points
+ double split
+ double* maxes
+ double* mins
+ innernode* less
+ innernode* greater
+cdef struct leafnode:
+ int split_dim
+ int n_points
+ int start_idx
+ int end_idx
+ double* maxes
+ double* mins
+
+# this is the standard trick for variable-size arrays:
+# malloc sizeof(nodeinfo)+self.m*sizeof(double) bytes.
+cdef struct nodeinfo:
+ innernode* node
+ double side_distances[0]
+
+cdef class cKDTree:
+ """kd-tree for quick nearest-neighbor lookup
+
+ This class provides an index into a set of k-dimensional points
+ which can be used to rapidly look up the nearest neighbors of any
+ point.
+
+ The algorithm used is described in Maneewongvatana and Mount 1999.
+ The general idea is that the kd-tree is a binary trie, each of whose
+ nodes represents an axis-aligned hyperrectangle. Each node specifies
+ an axis and splits the set of points based on whether their coordinate
+ along that axis is greater than or less than a particular value.
+
+ During construction, the axis and splitting point are chosen by the
+ "sliding midpoint" rule, which ensures that the cells do not all
+ become long and thin.
+
+ The tree can be queried for the r closest neighbors of any given point
+ (optionally returning only those within some maximum distance of the
+ point). It can also be queried, with a substantial gain in efficiency,
+ for the r approximate closest neighbors.
+
+ For large dimensions (20 is already large) do not expect this to run
+ significantly faster than brute force. High-dimensional nearest-neighbor
+ queries are a substantial open problem in computer science.
+
+ Parameters
+ ----------
+ data : array-like, shape (n,m)
+ The n data points of dimension m to be indexed. This array is
+ not copied unless this is necessary to produce a contiguous
+ array of doubles, and so modifying this data will result in
+ bogus results.
+ leafsize : positive integer
+ The number of points at which the algorithm switches over to
+ brute-force.
+
+ """
+
+ cdef innernode* tree
+ cdef readonly object data
+ cdef double* raw_data
+ cdef readonly int n, m
+ cdef readonly int leafsize
+ cdef readonly object maxes
+ cdef double* raw_maxes
+ cdef readonly object mins
+ cdef double* raw_mins
+ cdef object indices
+ cdef np.int64_t* raw_indices
+ def __init__(cKDTree self, data, int leafsize=10):
+ cdef np.ndarray[double, ndim=2] inner_data
+ cdef np.ndarray[double, ndim=1] inner_maxes
+ cdef np.ndarray[double, ndim=1] inner_mins
+ cdef np.ndarray[np.int64_t, ndim=1] inner_indices
+ self.data = np.ascontiguousarray(data,dtype=np.double)
+ self.n, self.m = np.shape(self.data)
+ self.leafsize = leafsize
+ if self.leafsize<1:
+ raise ValueError("leafsize must be at least 1")
+ self.maxes = np.ascontiguousarray(np.amax(self.data,axis=0))
+ self.mins = np.ascontiguousarray(np.amin(self.data,axis=0))
+ self.indices = np.ascontiguousarray(np.arange(self.n,dtype=np.int64))
+
+ inner_data = self.data
+ self.raw_data = <double*>inner_data.data
+ inner_maxes = self.maxes
+ self.raw_maxes = <double*>inner_maxes.data
+ inner_mins = self.mins
+ self.raw_mins = <double*>inner_mins.data
+ inner_indices = self.indices
+ self.raw_indices = <np.int64_t*>inner_indices.data
+
+ self.tree = self.__build(0, self.n, self.raw_maxes, self.raw_mins)
+
+ cdef innernode* __build(cKDTree self, int start_idx, int end_idx, double* maxes, double* mins):
+ cdef leafnode* n
+ cdef innernode* ni
+ cdef int i, j, t, p, q, d
+ cdef double size, split, minval, maxval
+ cdef double*mids
+ if end_idx-start_idx<=self.leafsize:
+ n = <leafnode*>stdlib.malloc(sizeof(leafnode))
+ # Skory
+ n.maxes = <double*>stdlib.malloc(sizeof(double)*self.m)
+ n.mins = <double*>stdlib.malloc(sizeof(double)*self.m)
+ for i in range(self.m):
+ n.maxes[i] = maxes[i]
+ n.mins[i] = mins[i]
+ n.split_dim = -1
+ n.start_idx = start_idx
+ n.end_idx = end_idx
+ return <innernode*>n
+ else:
+ d = 0
+ size = 0
+ for i in range(self.m):
+ if maxes[i]-mins[i] > size:
+ d = i
+ size = maxes[i]-mins[i]
+ maxval = maxes[d]
+ minval = mins[d]
+ if maxval==minval:
+ # all points are identical; warn user?
+ n = <leafnode*>stdlib.malloc(sizeof(leafnode))
+ n.split_dim = -1
+ n.start_idx = start_idx
+ n.end_idx = end_idx
+ return <innernode*>n
+
+ split = (maxval+minval)/2
+
+ p = start_idx
+ q = end_idx-1
+ while p<=q:
+ if self.raw_data[self.raw_indices[p]*self.m+d]<split:
+ p+=1
+ elif self.raw_data[self.raw_indices[q]*self.m+d]>=split:
+ q-=1
+ else:
+ t = self.raw_indices[p]
+ self.raw_indices[p] = self.raw_indices[q]
+ self.raw_indices[q] = t
+ p+=1
+ q-=1
+
+ # slide midpoint if necessary
+ if p==start_idx:
+ # no points less than split
+ j = start_idx
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ for i in range(start_idx+1, end_idx):
+ if self.raw_data[self.raw_indices[i]*self.m+d]<split:
+ j = i
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ t = self.raw_indices[start_idx]
+ self.raw_indices[start_idx] = self.raw_indices[j]
+ self.raw_indices[j] = t
+ p = start_idx+1
+ q = start_idx
+ elif p==end_idx:
+ # no points greater than split
+ j = end_idx-1
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ for i in range(start_idx, end_idx-1):
+ if self.raw_data[self.raw_indices[i]*self.m+d]>split:
+ j = i
+ split = self.raw_data[self.raw_indices[j]*self.m+d]
+ t = self.raw_indices[end_idx-1]
+ self.raw_indices[end_idx-1] = self.raw_indices[j]
+ self.raw_indices[j] = t
+ p = end_idx-1
+ q = end_idx-2
+
+ # construct new node representation
+ ni = <innernode*>stdlib.malloc(sizeof(innernode))
+
+ mids = <double*>stdlib.malloc(sizeof(double)*self.m)
+ for i in range(self.m):
+ mids[i] = maxes[i]
+ mids[d] = split
+ ni.less = self.__build(start_idx,p,mids,mins)
+
+ for i in range(self.m):
+ mids[i] = mins[i]
+ mids[d] = split
+ ni.greater = self.__build(p,end_idx,maxes,mids)
+
+ stdlib.free(mids)
+
+ ni.split_dim = d
+ ni.split = split
+ # Skory
+ ni.maxes = <double*>stdlib.malloc(sizeof(double)*self.m)
+ ni.mins = <double*>stdlib.malloc(sizeof(double)*self.m)
+ for i in range(self.m):
+ ni.maxes[i] = maxes[i]
+ ni.mins[i] = mins[i]
+
+ return ni
+
+ cdef __free_tree(cKDTree self, innernode* node):
+ if node.split_dim!=-1:
+ self.__free_tree(node.less)
+ self.__free_tree(node.greater)
+ stdlib.free(node.maxes) # Skory
+ stdlib.free(node.mins)
+ stdlib.free(node)
+
+ def __dealloc__(cKDTree self):
+ if <int>(self.tree) == 0:
+ # should happen only if __init__ was never called
+ return
+ self.__free_tree(self.tree)
+
+ cdef void __query(cKDTree self,
+ double*result_distances,
+ long*result_indices,
+ double*x,
+ int k,
+ double eps,
+ double p,
+ double distance_upper_bound,
+ double*period):
+ cdef heap q
+ cdef heap neighbors
+
+ cdef int i, j
+ cdef double t
+ cdef nodeinfo* inf
+ cdef nodeinfo* inf2
+ cdef double d
+ cdef double m_left, m_right, m
+ cdef double epsfac
+ cdef double min_distance
+ cdef double far_min_distance
+ cdef heapitem it, it2, neighbor
+ cdef leafnode* node
+ cdef innernode* inode
+ cdef innernode* near
+ cdef innernode* far
+ cdef double* side_distances
+
+ # priority queue for chasing nodes
+ # entries are:
+ # minimum distance between the cell and the target
+ # distances between the nearest side of the cell and the target
+ # the head node of the cell
+ heapcreate(&q,12)
+
+ # priority queue for the nearest neighbors
+ # furthest known neighbor first
+ # entries are (-distance**p, i)
+ heapcreate(&neighbors,k)
+
+ # set up first nodeinfo
+ inf = <nodeinfo*>stdlib.malloc(sizeof(nodeinfo)+self.m*sizeof(double))
+ inf.node = self.tree
+ for i in range(self.m):
+ inf.side_distances[i] = 0
+ t = x[i]-self.raw_maxes[i]
+ if t>inf.side_distances[i]:
+ inf.side_distances[i] = t
+ else:
+ t = self.raw_mins[i]-x[i]
+ if t>inf.side_distances[i]:
+ inf.side_distances[i] = t
+ if p!=1 and p!=infinity:
+ inf.side_distances[i]=inf.side_distances[i]**p
+
+ # compute first distance
+ min_distance = 0.
+ for i in range(self.m):
+ if p==infinity:
+ min_distance = dmax(min_distance,inf.side_distances[i])
+ else:
+ min_distance += inf.side_distances[i]
+
+ # fiddle approximation factor
+ if eps==0:
+ epsfac=1
+ elif p==infinity:
+ epsfac = 1/(1+eps)
+ else:
+ epsfac = 1/(1+eps)**p
+
+ # internally we represent all distances as distance**p
+ if p!=infinity and distance_upper_bound!=infinity:
+ distance_upper_bound = distance_upper_bound**p
+
+ while True:
+ if inf.node.split_dim==-1:
+ node = <leafnode*>inf.node
+
+ # brute-force
+ for i in range(node.start_idx,node.end_idx):
+ d = _distance_p(
+ self.raw_data+self.raw_indices[i]*self.m,
+ x,p,self.m,distance_upper_bound,period)
+
+ if d<distance_upper_bound:
+ # replace furthest neighbor
+ if neighbors.n==k:
+ heapremove(&neighbors)
+ neighbor.priority = -d
+ neighbor.contents.intdata = self.raw_indices[i]
+ heappush(&neighbors,neighbor)
+
+ # adjust upper bound for efficiency
+ if neighbors.n==k:
+ distance_upper_bound = -heappeek(&neighbors).priority
+ # done with this node, get another
+ stdlib.free(inf)
+ if q.n==0:
+ # no more nodes to visit
+ break
+ else:
+ it = heappop(&q)
+ inf = <nodeinfo*>it.contents.ptrdata
+ min_distance = it.priority
+ else:
+ inode = <innernode*>inf.node
+
+ # we don't push cells that are too far onto the queue at all,
+ # but since the distance_upper_bound decreases, we might get
+ # here even if the cell's too far
+ if min_distance>distance_upper_bound*epsfac:
+ # since this is the nearest cell, we're done, bail out
+ stdlib.free(inf)
+ # free all the nodes still on the heap
+ for i in range(q.n):
+ stdlib.free(q.heap[i].contents.ptrdata)
+ break
+
+ # set up children for searching
+ if x[inode.split_dim]<inode.split:
+ near = inode.less
+ far = inode.greater
+ else:
+ near = inode.greater
+ far = inode.less
+
+ # near child is at the same distance as the current node
+ # we're going here next, so no point pushing it on the queue
+ # no need to recompute the distance or the side_distances
+ inf.node = near
+
+ # far child is further by an amount depending only
+ # on the split value; compute its distance and side_distances
+ # and push it on the queue if it's near enough
+ inf2 = <nodeinfo*>stdlib.malloc(sizeof(nodeinfo)+self.m*sizeof(double))
+ it2.contents.ptrdata = <char*> inf2
+ inf2.node = far
+
+ # Periodicity added by S Skory
+ m_left = dmin( dabs(far.mins[inode.split_dim] - x[inode.split_dim]), \
+ period[inode.split_dim] - dabs(far.mins[inode.split_dim] - x[inode.split_dim]))
+ m_right = dmin( dabs(far.maxes[inode.split_dim] - x[inode.split_dim]), \
+ period[inode.split_dim] - dabs(far.maxes[inode.split_dim] - x[inode.split_dim]))
+ m = dmin(m_left,m_right)
+
+ # most side distances unchanged
+ for i in range(self.m):
+ inf2.side_distances[i] = inf.side_distances[i]
+
+ # one side distance changes
+ # we can adjust the minimum distance without recomputing
+ if p == infinity:
+ # we never use side_distances in the l_infinity case
+ # inf2.side_distances[inode.split_dim] = dabs(inode.split-x[inode.split_dim])
+ far_min_distance = dmax(min_distance, m)
+ elif p == 1:
+ inf2.side_distances[inode.split_dim] = m
+ far_min_distance = dmax(min_distance, m)
+ else:
+ inf2.side_distances[inode.split_dim] = m**p
+ #far_min_distance = min_distance - inf.side_distances[inode.split_dim] + inf2.side_distances[inode.split_dim]
+ far_min_distance = m**p
+
+ it2.priority = far_min_distance
+
+
+ # far child might be too far, if so, don't bother pushing it
+ if far_min_distance<=distance_upper_bound*epsfac:
+ heappush(&q,it2)
+ else:
+ stdlib.free(inf2)
+ # just in case
+ it2.contents.ptrdata = <char*> 0
+
+ # fill output arrays with sorted neighbors
+ for i in range(neighbors.n-1,-1,-1):
+ neighbor = heappop(&neighbors) # FIXME: neighbors may be realloced
+ result_indices[i] = neighbor.contents.intdata
+ if p==1 or p==infinity:
+ result_distances[i] = -neighbor.priority
+ else:
+ result_distances[i] = (-neighbor.priority) #**(1./p) S. Skory
+
+ heapdestroy(&q)
+ heapdestroy(&neighbors)
+
+ def query(cKDTree self, object x, int k=1, double eps=0, double p=2,
+ double distance_upper_bound=infinity, object period=None):
+ """query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf,
+ period=None)
+
+ Query the kd-tree for nearest neighbors.
+
+ Parameters
+ ----------
+ x : array_like, last dimension self.m
+ An array of points to query.
+ k : int
+ The number of nearest neighbors to return.
+ eps : non-negative float
+ Return approximate nearest neighbors; the k-th returned value
+ is guaranteed to be no further than (1 + `eps`) times the
+ distance to the real k-th nearest neighbor.
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+ 1 is the sum-of-absolute-values "Manhattan" distance.
+ 2 is the usual Euclidean distance.
+ infinity is the maximum-coordinate-difference distance.
+ distance_upper_bound : non-negative float
+ Return only neighbors within this distance. This is used to prune
+ tree searches, so if you are doing a series of nearest-neighbor
+ queries, it may help to supply the distance to the nearest neighbor
+ of the most recent point.
+
+ Returns
+ -------
+ d : ndarray of floats
+ The distances to the nearest neighbors.
+ If `x` has shape tuple+(self.m,), then `d` has shape tuple+(k,).
+ Missing neighbors are indicated with infinite distances.
+ i : ndarray of ints
+ The locations of the neighbors in self.data.
+ If `x` has shape tuple+(self.m,), then `i` has shape tuple+(k,).
+ Missing neighbors are indicated with self.n.
+
+ """
+ cdef np.ndarray[long, ndim=2] ii
+ cdef np.ndarray[double, ndim=2] dd
+ cdef np.ndarray[double, ndim=2] xx
+ cdef np.ndarray[double, ndim=1] cperiod
+ cdef int c
+ x = np.asarray(x).astype(np.double)
+ if period is None:
+ period = np.array([np.inf]*self.m)
+ else:
+ period = np.asarray(period).astype(np.double)
+ cperiod = np.ascontiguousarray(period)
+ if np.shape(x)[-1] != self.m:
+ raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x)))
+ if p<1:
+ raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+ if len(x.shape)==1:
+ single = True
+ x = x[np.newaxis,:]
+ else:
+ single = False
+ retshape = np.shape(x)[:-1]
+ n = np.prod(retshape)
+ xx = np.reshape(x,(n,self.m))
+ xx = np.ascontiguousarray(xx)
+ dd = np.empty((n,k),dtype=np.double)
+ dd.fill(infinity)
+ ii = np.empty((n,k),dtype=np.long)
+ ii.fill(self.n)
+ for c in range(n):
+ self.__query(
+ (<double*>dd.data)+c*k,
+ (<long*>ii.data)+c*k,
+ (<double*>xx.data)+c*self.m,
+ k,
+ eps,
+ p,
+ distance_upper_bound,
+ <double*>cperiod.data)
+ if single:
+ if k==1:
+ return dd[0,0], ii[0,0]
+ else:
+ return dd[0], ii[0]
+ else:
+ if k==1:
+ return np.reshape(dd[...,0],retshape), np.reshape(ii[...,0],retshape)
+ else:
+ return np.reshape(dd,retshape+(k,)), np.reshape(ii,retshape+(k,))
+
+ def chainHOP_get_dens(cKDTree self, object mass, int num_neighbors=65, \
+ int nMerge=6):
+ """ query the tree for the nearest neighbors, to get the density
+ of particles for chainHOP.
+
+ Parameters:
+ ===========
+
+ mass: A array-like list of the masses of the particles, in the same
+ order as the data that went into building the kd tree.
+
+ num_neighbors: Optional, the number of neighbors to search for and to
+ use in the density calculation. Default is 65, and is probably what
+ one should stick with.
+
+ nMerge: The number of nearest neighbor tags to return for each particle.
+
+ Returns:
+ ========
+
+ dens: An array of the densities for each particle, in the same order
+ as the input data.
+
+ tags: A two-dimensional array of the indexes, nMerge nearest neighbors
+ for each particle.
+
+ """
+
+ # We're no longer returning all the tags in this step.
+ # We do it chunked, in find_chunk_nearest_neighbors.
+ #cdef np.ndarray[long, ndim=2] tags
+ cdef np.ndarray[double, ndim=1] dens
+ cdef np.ndarray[double, ndim=1] query
+ cdef np.ndarray[long, ndim=1] tags_temp
+ cdef np.ndarray[double, ndim=1] dist_temp
+ cdef int i, pj, j
+ cdef double ih2, fNorm, r2, rs
+
+ #tags = np.empty((self.n, nMerge), dtype=np.long)
+ dens = np.empty(self.n, dtype=np.double)
+ query = np.empty(self.m, dtype=np.double)
+ tags_temp = np.empty(num_neighbors, dtype=np.long)
+ dist_temp = np.empty(num_neighbors, dtype=np.double)
+ # Need to start out with zeros before we start adding to it.
+ dens.fill(0.0)
+
+ mass = np.array(mass).astype(np.double)
+ mass = np.ascontiguousarray(mass)
+
+ for i in range(self.n):
+ query = self.data[i]
+ (dist_temp, tags_temp) = self.query(query, k=num_neighbors, period=[1.]*3)
+
+ #calculate the density for this particle
+ ih2 = 4.0/np.max(dist_temp)
+ fNorm = 0.5*np.sqrt(ih2)*ih2/np.pi
+ for j in range(num_neighbors):
+ pj = tags_temp[j]
+ r2 = dist_temp[j] * ih2
+ rs = 2.0 - np.sqrt(r2)
+ if (r2 < 1.0):
+ rs = (1.0 - 0.75*rs*r2)
+ else:
+ rs = 0.25*rs*rs*rs
+ rs = rs * fNorm
+ dens[i] = dens[i] + rs * mass[pj]
+ dens[pj] = dens[pj] + rs * mass[i]
+
+ # store nMerge nearest neighbors
+ #tags[i,:] = tags_temp[:nMerge]
+
+ #return (dens, tags)
+ return dens
+
+ def find_chunk_nearest_neighbors(cKDTree self, int start, int finish, \
+ int num_neighbors=65):
+ """ query the tree in chunks, between start and finish, recording the
+ nearest neighbors.
+
+ Parameters:
+ ===========
+
+ start: The starting point in the dataset for this search.
+
+ finish: The ending point in the dataset for this search.
+
+ num_neighbors: Optional, the number of neighbors to search for.
+ The default is 65.
+
+ Returns:
+ ========
+
+ chunk_tags: A two-dimensional array of the nearest neighbor tags for the
+ points in this search.
+
+ """
+
+ cdef np.ndarray[long, ndim=2] chunk_tags
+ cdef np.ndarray[double, ndim=1] query
+ cdef np.ndarray[long, ndim=1] tags_temp
+ cdef np.ndarray[double, ndim=1] dist_temp
+ cdef int i
+
+ chunk_tags = np.empty((finish-start, num_neighbors), dtype=np.long)
+ query = np.empty(self.m, dtype=np.double)
+ tags_temp = np.empty(num_neighbors, dtype=np.long)
+ dist_temp = np.empty(num_neighbors, dtype=np.double)
+
+ for i in range(finish-start):
+ query = self.data[i+start]
+ (dist_temp, tags_temp) = self.query(query, k=num_neighbors, period=[1.]*3)
+ chunk_tags[i,:] = tags_temp[:]
+
+ return chunk_tags
+
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/common.h
--- /dev/null
+++ b/yt/utilities/spatial/common.h
@@ -0,0 +1,70 @@
+/**
+ * common.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved into new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CLUSTER_COMMON_H
+#define _CLUSTER_COMMON_H
+
+#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
+#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))
+
+#define NCHOOSE2(_n) ((_n)*(_n-1)/2)
+
+#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
+#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
+ CPY_BITS_PER_CHAR))
+#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
+ ((CPY_BITS_PER_CHAR-1) - \
+ ((i) % CPY_BITS_PER_CHAR))) & 0x1)
+#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
+ ((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
+ ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+
+#ifndef CPY_CEIL_DIV
+#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
+ ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
+#endif
+
+
+#ifdef CPY_DEBUG
+#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define CPY_DEBUG_MSG(...)
+#endif
+
+#endif
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/distance.py
--- /dev/null
+++ b/yt/utilities/spatial/distance.py
@@ -0,0 +1,2178 @@
+"""
+=====================================================
+Distance computations (:mod:`scipy.spatial.distance`)
+=====================================================
+
+.. sectionauthor:: Damian Eads
+
+Function Reference
+------------------
+
+Distance matrix computation from a collection of raw observation vectors
+stored in a rectangular array.
+
+.. autosummary::
+ :toctree: generated/
+
+ pdist -- pairwise distances between observation vectors.
+ cdist -- distances between between two collections of observation vectors
+ squareform -- convert distance matrix to a condensed one and vice versa
+
+Predicates for checking the validity of distance matrices, both
+condensed and redundant. Also contained in this module are functions
+for computing the number of observations in a distance matrix.
+
+.. autosummary::
+ :toctree: generated/
+
+ is_valid_dm -- checks for a valid distance matrix
+ is_valid_y -- checks for a valid condensed distance matrix
+ num_obs_dm -- # of observations in a distance matrix
+ num_obs_y -- # of observations in a condensed distance matrix
+
+Distance functions between two vectors ``u`` and ``v``. Computing
+distances over a large collection of vectors is inefficient for these
+functions. Use ``pdist`` for this purpose.
+
+.. autosummary::
+ :toctree: generated/
+
+ braycurtis -- the Bray-Curtis distance.
+ canberra -- the Canberra distance.
+ chebyshev -- the Chebyshev distance.
+ cityblock -- the Manhattan distance.
+ correlation -- the Correlation distance.
+ cosine -- the Cosine distance.
+ dice -- the Dice dissimilarity (boolean).
+ euclidean -- the Euclidean distance.
+ hamming -- the Hamming distance (boolean).
+ jaccard -- the Jaccard distance (boolean).
+ kulsinski -- the Kulsinski distance (boolean).
+ mahalanobis -- the Mahalanobis distance.
+ matching -- the matching dissimilarity (boolean).
+ minkowski -- the Minkowski distance.
+ rogerstanimoto -- the Rogers-Tanimoto dissimilarity (boolean).
+ russellrao -- the Russell-Rao dissimilarity (boolean).
+ seuclidean -- the normalized Euclidean distance.
+ sokalmichener -- the Sokal-Michener dissimilarity (boolean).
+ sokalsneath -- the Sokal-Sneath dissimilarity (boolean).
+ sqeuclidean -- the squared Euclidean distance.
+ yule -- the Yule dissimilarity (boolean).
+
+
+References
+----------
+
+.. [Sta07] "Statistics toolbox." API Reference Documentation. The MathWorks.
+ http://www.mathworks.com/access/helpdesk/help/toolbox/stats/.
+ Accessed October 1, 2007.
+
+.. [Mti07] "Hierarchical clustering." API Reference Documentation.
+ The Wolfram Research, Inc.
+ http://reference.wolfram.com/mathematica/HierarchicalClustering/tutorial/HierarchicalClustering.html.
+ Accessed October 1, 2007.
+
+.. [Gow69] Gower, JC and Ross, GJS. "Minimum Spanning Trees and Single Linkage
+ Cluster Analysis." Applied Statistics. 18(1): pp. 54--64. 1969.
+
+.. [War63] Ward Jr, JH. "Hierarchical grouping to optimize an objective
+ function." Journal of the American Statistical Association. 58(301):
+ pp. 236--44. 1963.
+
+.. [Joh66] Johnson, SC. "Hierarchical clustering schemes." Psychometrika.
+ 32(2): pp. 241--54. 1966.
+
+.. [Sne62] Sneath, PH and Sokal, RR. "Numerical taxonomy." Nature. 193: pp.
+ 855--60. 1962.
+
+.. [Bat95] Batagelj, V. "Comparing resemblance measures." Journal of
+ Classification. 12: pp. 73--90. 1995.
+
+.. [Sok58] Sokal, RR and Michener, CD. "A statistical method for evaluating
+ systematic relationships." Scientific Bulletins. 38(22):
+ pp. 1409--38. 1958.
+
+.. [Ede79] Edelbrock, C. "Mixture model tests of hierarchical clustering
+ algorithms: the problem of classifying everybody." Multivariate
+ Behavioral Research. 14: pp. 367--84. 1979.
+
+.. [Jai88] Jain, A., and Dubes, R., "Algorithms for Clustering Data."
+ Prentice-Hall. Englewood Cliffs, NJ. 1988.
+
+.. [Fis36] Fisher, RA "The use of multiple measurements in taxonomic
+ problems." Annals of Eugenics, 7(2): 179-188. 1936
+
+
+Copyright Notice
+----------------
+
+Copyright (C) Damian Eads, 2007-2008. New BSD License.
+
+"""
+
+import warnings
+import numpy as np
+from numpy.linalg import norm
+
+import _distance_wrap
+
+
+def _copy_array_if_base_present(a):
+ """
+ Copies the array if its base points to a parent array.
+ """
+ if a.base is not None:
+ return a.copy()
+ elif np.issubsctype(a, np.float32):
+ return np.array(a, dtype=np.double)
+ else:
+ return a
+
+
+def _copy_arrays_if_base_present(T):
+ """
+ Accepts a tuple of arrays T. Copies the array T[i] if its base array
+ points to an actual array. Otherwise, the reference is just copied.
+ This is useful if the arrays are being passed to a C function that
+ does not do proper striding.
+ """
+ l = [_copy_array_if_base_present(a) for a in T]
+ return l
+
+
+def _convert_to_bool(X):
+ if X.dtype != np.bool:
+ X = np.bool_(X)
+ if not X.flags.contiguous:
+ X = X.copy()
+ return X
+
+
+def _convert_to_double(X):
+ if X.dtype != np.double:
+ X = np.double(X)
+ if not X.flags.contiguous:
+ X = X.copy()
+ return X
+
+
+def _validate_vector(u, dtype=None):
+ # XXX Is order='c' really necessary?
+ u = np.asarray(u, dtype=dtype, order='c').squeeze()
+ # Ensure values such as u=1 and u=[1] still return 1-D arrays.
+ u = np.atleast_1d(u)
+ if u.ndim > 1:
+ raise ValueError("Input vector should be 1-D.")
+ return u
+
+
+def minkowski(u, v, p):
+ r"""
+ Computes the Minkowski distance between two vectors ``u`` and ``v``,
+ defined as
+
+ .. math::
+
+ {||u-v||}_p = (\sum{|u_i - v_i|^p})^{1/p}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An n-dimensional vector.
+ v : ndarray
+ An n-dimensional vector.
+ p : int
+ The order of the norm of the difference :math:`{||u-v||}_p`.
+
+ Returns
+ -------
+ d : double
+ The Minkowski distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if p < 1:
+ raise ValueError("p must be at least 1")
+ dist = norm(u - v, ord=p)
+ return dist
+
+
+def wminkowski(u, v, p, w):
+ r"""
+ Computes the weighted Minkowski distance between two vectors ``u``
+ and ``v``, defined as
+
+ .. math::
+
+ \left(\sum{(w_i |u_i - v_i|^p)}\right)^{1/p}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+ p : int
+ The order of the norm of the difference :math:`{||u-v||}_p`.
+ w : ndarray
+ The weight vector.
+
+ Returns
+ -------
+ d : double
+ The Minkowski distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ w = _validate_vector(w)
+ if p < 1:
+ raise ValueError("p must be at least 1")
+ dist = norm(w * (u - v), ord=p)
+ return dist
+
+
+def euclidean(u, v):
+ """
+ Computes the Euclidean distance between two n-vectors ``u`` and ``v``,
+ which is defined as
+
+ .. math::
+
+ {||u-v||}_2
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Euclidean distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = norm(u - v)
+ return dist
+
+
+def sqeuclidean(u, v):
+ """
+ Computes the squared Euclidean distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ {||u-v||}_2^2.
+
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The squared Euclidean distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = ((u - v) ** 2).sum()
+ return dist
+
+
+def cosine(u, v):
+ r"""
+ Computes the Cosine distance between two n-vectors u and v, which
+ is defined as
+
+ .. math::
+
+ 1 - \frac{uv^T}
+ {||u||_2 ||v||_2}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Cosine distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v))
+ return dist
+
+
+def correlation(u, v):
+ r"""
+ Computes the correlation distance between two n-vectors ``u`` and
+ ``v``, which is defined as
+
+ .. math::
+
+ 1 - frac{(u - \bar{u}){(v - \bar{v})}^T}
+ {{||(u - \bar{u})||}_2 {||(v - \bar{v})||}_2^T}
+
+ where :math:`\bar{u}` is the mean of a vectors elements and ``n``
+ is the common dimensionality of ``u`` and ``v``.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The correlation distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ umu = u.mean()
+ vmu = v.mean()
+ um = u - umu
+ vm = v - vmu
+ dist = 1.0 - np.dot(um, vm) / (norm(um) * norm(vm))
+ return dist
+
+
+def hamming(u, v):
+ r"""
+ Computes the Hamming distance between two n-vectors ``u`` and
+ ``v``, which is simply the proportion of disagreeing components in
+ ``u`` and ``v``. If ``u`` and ``v`` are boolean vectors, the Hamming
+ distance is
+
+ .. math::
+
+ \frac{c_{01} + c_{10}}{n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Hamming distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ return (u != v).mean()
+
+
+def jaccard(u, v):
+ """
+ Computes the Jaccard-Needham dissimilarity between two boolean
+ n-vectors u and v, which is
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT}}
+ {c_{TT} + c_{FT} + c_{TF}}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Jaccard distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ dist = (np.double(np.bitwise_and((u != v),
+ np.bitwise_or(u != 0, v != 0)).sum())
+ / np.double(np.bitwise_or(u != 0, v != 0).sum()))
+ return dist
+
+
+def kulsinski(u, v):
+ """
+ Computes the Kulsinski dissimilarity between two boolean n-vectors
+ u and v, which is defined as
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT} - c_{TT} + n}
+ {c_{FT} + c_{TF} + n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Kulsinski distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ n = float(len(u))
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+
+ return (ntf + nft - ntt + n) / (ntf + nft + n)
+
+
+def seuclidean(u, v, V):
+ """
+ Returns the standardized Euclidean distance between two n-vectors
+ ``u`` and ``v``. ``V`` is an n-dimensional vector of component
+ variances. It is usually computed among a larger collection
+ vectors.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+ V : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The standardized Euclidean distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ V = _validate_vector(V, dtype=np.float64)
+ if V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]:
+ raise TypeError('V must be a 1-D array of the same dimension '
+ 'as u and v.')
+ return np.sqrt(((u - v) ** 2 / V).sum())
+
+
+def cityblock(u, v):
+ """
+ Computes the Manhattan distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ \\sum_i {\\left| u_i - v_i \\right|}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The City Block distance between vectors ``u`` and ``v``.
+
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ return abs(u - v).sum()
+
+
+def mahalanobis(u, v, VI):
+ r"""
+ Computes the Mahalanobis distance between two n-vectors ``u`` and ``v``,
+ which is defiend as
+
+ .. math::
+
+ (u-v)V^{-1}(u-v)^T
+
+ where ``VI`` is the inverse covariance matrix :math:`V^{-1}`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Mahalanobis distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ VI = np.atleast_2d(VI)
+ delta = u - v
+ m = np.dot(np.dot(delta, VI), delta)
+ return np.sqrt(m)
+
+
+def chebyshev(u, v):
+ r"""
+ Computes the Chebyshev distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ \max_i {|u_i-v_i|}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Chebyshev distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ return max(abs(u - v))
+
+
+def braycurtis(u, v):
+ r"""
+ Computes the Bray-Curtis distance between two n-vectors ``u`` and
+ ``v``, which is defined as
+
+ .. math::
+
+ \sum{|u_i-v_i|} / \sum{|u_i+v_i|}.
+
+ The Bray-Curtis distance is in the range [0, 1] if all coordinates are
+ positive, and is undefined if the inputs are of length zero.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Bray-Curtis distance between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v, dtype=np.float64)
+ return abs(u - v).sum() / abs(u + v).sum()
+
+
+def canberra(u, v):
+ r"""
+ Computes the Canberra distance between two n-vectors u and v,
+ which is defined as
+
+ .. math::
+
+ \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Canberra distance between vectors ``u`` and ``v``.
+
+ Notes
+ -----
+ Whe u[i] and v[i] are 0 for given i, then the fraction 0/0 = 0 is used in
+ the calculation.
+
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v, dtype=np.float64)
+ olderr = np.seterr(invalid='ignore')
+ try:
+ d = np.nansum(abs(u - v) / (abs(u) + abs(v)))
+ finally:
+ np.seterr(**olderr)
+ return d
+
+
+def _nbool_correspond_all(u, v):
+ if u.dtype != v.dtype:
+ raise TypeError("Arrays being compared must be of the same data type.")
+
+ if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+ not_u = 1.0 - u
+ not_v = 1.0 - v
+ nff = (not_u * not_v).sum()
+ nft = (not_u * v).sum()
+ ntf = (u * not_v).sum()
+ ntt = (u * v).sum()
+ elif u.dtype == np.bool:
+ not_u = ~u
+ not_v = ~v
+ nff = (not_u & not_v).sum()
+ nft = (not_u & v).sum()
+ ntf = (u & not_v).sum()
+ ntt = (u & v).sum()
+ else:
+ raise TypeError("Arrays being compared have unknown type.")
+
+ return (nff, nft, ntf, ntt)
+
+
+def _nbool_correspond_ft_tf(u, v):
+ if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
+ not_u = 1.0 - u
+ not_v = 1.0 - v
+ nft = (not_u * v).sum()
+ ntf = (u * not_v).sum()
+ else:
+ not_u = ~u
+ not_v = ~v
+ nft = (not_u & v).sum()
+ ntf = (u & not_v).sum()
+ return (nft, ntf)
+
+
+def yule(u, v):
+ r"""
+ Computes the Yule dissimilarity between two boolean n-vectors u and v,
+ which is defined as
+
+
+ .. math::
+
+ \frac{R}{c_{TT} + c_{FF} + \frac{R}{2}}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n` and :math:`R = 2.0 * (c_{TF} + c_{FT})`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Yule dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+ return float(2.0 * ntf * nft) / float(ntt * nff + ntf * nft)
+
+
+def matching(u, v):
+ r"""
+ Computes the Matching dissimilarity between two boolean n-vectors
+ u and v, which is defined as
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT}}{n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Matching dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(nft + ntf) / float(len(u))
+
+
+def dice(u, v):
+ r"""
+ Computes the Dice dissimilarity between two boolean n-vectors
+ ``u`` and ``v``, which is
+
+ .. math::
+
+ \frac{c_{TF} + c_{FT}}
+ {2c_{TT} + c_{FT} + c_{TF}}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Dice dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(ntf + nft) / float(2.0 * ntt + ntf + nft)
+
+
+def rogerstanimoto(u, v):
+ r"""
+ Computes the Rogers-Tanimoto dissimilarity between two boolean
+ n-vectors ``u`` and ``v``, which is defined as
+
+ .. math::
+ \frac{R}
+ {c_{TT} + c_{FF} + R}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n` and :math:`R = 2(c_{TF} + c_{FT})`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Rogers-Tanimoto dissimilarity between vectors
+ `u` and `v`.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ (nff, nft, ntf, ntt) = _nbool_correspond_all(u, v)
+ return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))
+
+
+def russellrao(u, v):
+ r"""
+ Computes the Russell-Rao dissimilarity between two boolean n-vectors
+ ``u`` and ``v``, which is defined as
+
+ .. math::
+
+ \frac{n - c_{TT}}
+ {n}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Russell-Rao dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ return float(len(u) - ntt) / float(len(u))
+
+
+def sokalmichener(u, v):
+ r"""
+ Computes the Sokal-Michener dissimilarity between two boolean vectors
+ ``u`` and ``v``, which is defined as
+
+ .. math::
+
+ \frac{R}
+ {S + R}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n`, :math:`R = 2 * (c_{TF} + c_{FT})` and
+ :math:`S = c_{FF} + c_{TT}`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Sokal-Michener dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ nff = (~u & ~v).sum()
+ else:
+ ntt = (u * v).sum()
+ nff = ((1.0 - u) * (1.0 - v)).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ return float(2.0 * (ntf + nft)) / float(ntt + nff + 2.0 * (ntf + nft))
+
+
+def sokalsneath(u, v):
+ r"""
+ Computes the Sokal-Sneath dissimilarity between two boolean vectors
+ ``u`` and ``v``,
+
+ .. math::
+
+ \frac{R}
+ {c_{TT} + R}
+
+ where :math:`c_{ij}` is the number of occurrences of
+ :math:`\mathtt{u[k]} = i` and :math:`\mathtt{v[k]} = j` for
+ :math:`k < n` and :math:`R = 2(c_{TF} + c_{FT})`.
+
+ Parameters
+ ----------
+ u : ndarray
+ An :math:`n`-dimensional vector.
+ v : ndarray
+ An :math:`n`-dimensional vector.
+
+ Returns
+ -------
+ d : double
+ The Sokal-Sneath dissimilarity between vectors ``u`` and ``v``.
+ """
+ u = _validate_vector(u)
+ v = _validate_vector(v)
+ if u.dtype == np.bool:
+ ntt = (u & v).sum()
+ else:
+ ntt = (u * v).sum()
+ (nft, ntf) = _nbool_correspond_ft_tf(u, v)
+ denom = ntt + 2.0 * (ntf + nft)
+ if denom == 0:
+ raise ValueError('Sokal-Sneath dissimilarity is not defined for '
+ 'vectors that are entirely false.')
+ return float(2.0 * (ntf + nft)) / denom
+
+
+def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
+ r"""
+ Computes the pairwise distances between m original observations in
+ n-dimensional space. Returns a condensed distance matrix Y. For
+ each :math:`i` and :math:`j` (where :math:`i<j<n`), the
+ metric ``dist(u=X[i], v=X[j])`` is computed and stored in the
+ :math:`ij`th entry.
+
+ See ``squareform`` for information on how to calculate the index of
+ this entry or to convert the condensed distance matrix to a
+ redundant square matrix.
+
+ The following are common calling conventions.
+
+ 1. ``Y = pdist(X, 'euclidean')``
+
+ Computes the distance between m points using Euclidean distance
+ (2-norm) as the distance metric between the points. The points
+ are arranged as m n-dimensional row vectors in the matrix X.
+
+ 2. ``Y = pdist(X, 'minkowski', p)``
+
+ Computes the distances using the Minkowski distance
+ :math:`||u-v||_p` (p-norm) where :math:`p \geq 1`.
+
+ 3. ``Y = pdist(X, 'cityblock')``
+
+ Computes the city block or Manhattan distance between the
+ points.
+
+ 4. ``Y = pdist(X, 'seuclidean', V=None)``
+
+ Computes the standardized Euclidean distance. The standardized
+ Euclidean distance between two n-vectors ``u`` and ``v`` is
+
+ .. math::
+
+ \sqrt{\sum {(u_i-v_i)^2 / V[x_i]}}.
+
+
+ V is the variance vector; V[i] is the variance computed over all
+ the i'th components of the points. If not passed, it is
+ automatically computed.
+
+ 5. ``Y = pdist(X, 'sqeuclidean')``
+
+ Computes the squared Euclidean distance :math:`||u-v||_2^2` between
+ the vectors.
+
+ 6. ``Y = pdist(X, 'cosine')``
+
+ Computes the cosine distance between vectors u and v,
+
+ .. math::
+
+ 1 - \frac{uv^T}
+ {{|u|}_2 {|v|}_2}
+
+ where |*|_2 is the 2 norm of its argument *.
+
+ 7. ``Y = pdist(X, 'correlation')``
+
+ Computes the correlation distance between vectors u and v. This is
+
+ .. math::
+
+ 1 - \frac{(u - \bar{u})(v - \bar{v})^T}
+ {{|(u - \bar{u})|}{|(v - \bar{v})|}^T}
+
+ where :math:`\bar{v}` is the mean of the elements of vector v.
+
+ 8. ``Y = pdist(X, 'hamming')``
+
+ Computes the normalized Hamming distance, or the proportion of
+ those vector elements between two n-vectors ``u`` and ``v``
+ which disagree. To save memory, the matrix ``X`` can be of type
+ boolean.
+
+ 9. ``Y = pdist(X, 'jaccard')``
+
+ Computes the Jaccard distance between the points. Given two
+ vectors, ``u`` and ``v``, the Jaccard distance is the
+ proportion of those elements ``u[i]`` and ``v[i]`` that
+ disagree where at least one of them is non-zero.
+
+ 10. ``Y = pdist(X, 'chebyshev')``
+
+ Computes the Chebyshev distance between the points. The
+ Chebyshev distance between two n-vectors ``u`` and ``v`` is the
+ maximum norm-1 distance between their respective elements. More
+ precisely, the distance is given by
+
+ .. math::
+
+ d(u,v) = \max_i {|u_i-v_i|}.
+
+ 11. ``Y = pdist(X, 'canberra')``
+
+ Computes the Canberra distance between the points. The
+ Canberra distance between two points ``u`` and ``v`` is
+
+ .. math::
+
+ d(u,v) = \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}
+
+
+ 12. ``Y = pdist(X, 'braycurtis')``
+
+ Computes the Bray-Curtis distance between the points. The
+ Bray-Curtis distance between two points ``u`` and ``v`` is
+
+
+ .. math::
+
+ d(u,v) = \frac{\sum_i {u_i-v_i}}
+ {\sum_i {u_i+v_i}}
+
+ 13. ``Y = pdist(X, 'mahalanobis', VI=None)``
+
+ Computes the Mahalanobis distance between the points. The
+ Mahalanobis distance between two points ``u`` and ``v`` is
+ :math:`(u-v)(1/V)(u-v)^T` where :math:`(1/V)` (the ``VI``
+ variable) is the inverse covariance. If ``VI`` is not None,
+ ``VI`` will be used as the inverse covariance matrix.
+
+ 14. ``Y = pdist(X, 'yule')``
+
+ Computes the Yule distance between each pair of boolean
+ vectors. (see yule function documentation)
+
+ 15. ``Y = pdist(X, 'matching')``
+
+ Computes the matching distance between each pair of boolean
+ vectors. (see matching function documentation)
+
+ 16. ``Y = pdist(X, 'dice')``
+
+ Computes the Dice distance between each pair of boolean
+ vectors. (see dice function documentation)
+
+ 17. ``Y = pdist(X, 'kulsinski')``
+
+ Computes the Kulsinski distance between each pair of
+ boolean vectors. (see kulsinski function documentation)
+
+ 18. ``Y = pdist(X, 'rogerstanimoto')``
+
+ Computes the Rogers-Tanimoto distance between each pair of
+ boolean vectors. (see rogerstanimoto function documentation)
+
+ 19. ``Y = pdist(X, 'russellrao')``
+
+ Computes the Russell-Rao distance between each pair of
+ boolean vectors. (see russellrao function documentation)
+
+ 20. ``Y = pdist(X, 'sokalmichener')``
+
+ Computes the Sokal-Michener distance between each pair of
+ boolean vectors. (see sokalmichener function documentation)
+
+ 21. ``Y = pdist(X, 'sokalsneath')``
+
+ Computes the Sokal-Sneath distance between each pair of
+ boolean vectors. (see sokalsneath function documentation)
+
+ 22. ``Y = pdist(X, 'wminkowski')``
+
+ Computes the weighted Minkowski distance between each pair of
+ vectors. (see wminkowski function documentation)
+
+ 23. ``Y = pdist(X, f)``
+
+ Computes the distance between all pairs of vectors in X
+ using the user supplied 2-arity function f. For example,
+ Euclidean distance between the vectors could be computed
+ as follows::
+
+ dm = pdist(X, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
+
+ Note that you should avoid passing a reference to one of
+ the distance functions defined in this library. For example,::
+
+ dm = pdist(X, sokalsneath)
+
+ would calculate the pair-wise distances between the vectors in
+ X using the Python function sokalsneath. This would result in
+ sokalsneath being called :math:`{n \choose 2}` times, which
+ is inefficient. Instead, the optimized C version is more
+ efficient, and we call it using the following syntax.::
+
+ dm = pdist(X, 'sokalsneath')
+
+ Parameters
+ ----------
+ X : ndarray
+ An m by n array of m original observations in an
+ n-dimensional space.
+ metric : string or function
+ The distance metric to use. The distance function can
+ be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
+ 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
+ 'jaccard', 'kulsinski', 'mahalanobis', 'matching',
+ 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
+ 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
+ w : ndarray
+ The weight vector (for weighted Minkowski).
+ p : double
+ The p-norm to apply (for Minkowski, weighted and unweighted)
+ V : ndarray
+ The variance vector (for standardized Euclidean).
+ VI : ndarray
+ The inverse of the covariance matrix (for Mahalanobis).
+
+ Returns
+ -------
+ Y : ndarray
+ A condensed distance matrix.
+
+ See Also
+ --------
+ squareform : converts between condensed distance matrices and
+ square distance matrices.
+ """
+
+# 21. Y = pdist(X, 'test_Y')
+#
+# Computes the distance between all pairs of vectors in X
+# using the distance metric Y but with a more succinct,
+# verifiable, but less efficient implementation.
+
+ X = np.asarray(X, order='c')
+
+ # The C code doesn't do striding.
+ [X] = _copy_arrays_if_base_present([_convert_to_double(X)])
+
+ s = X.shape
+ if len(s) != 2:
+ raise ValueError('A 2-dimensional array must be passed.')
+
+ m, n = s
+ dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
+
+ wmink_names = ['wminkowski', 'wmi', 'wm', 'wpnorm']
+ if w is None and (metric == wminkowski or metric in wmink_names):
+ raise ValueError('weighted minkowski requires a weight '
+ 'vector `w` to be given.')
+
+ if callable(metric):
+ if metric == minkowski:
+ def dfun(u, v):
+ return minkowski(u, v, p)
+ elif metric == wminkowski:
+ def dfun(u, v):
+ return wminkowski(u, v, p, w)
+ elif metric == seuclidean:
+ def dfun(u, v):
+ return seuclidean(u, v, V)
+ elif metric == mahalanobis:
+ def dfun(u, v):
+ return mahalanobis(u, v, V)
+ else:
+ dfun = metric
+
+ k = 0
+ for i in xrange(0, m - 1):
+ for j in xrange(i + 1, m):
+ dm[k] = dfun(X[i], X[j])
+ k = k + 1
+
+ elif isinstance(metric, basestring):
+ mstr = metric.lower()
+
+ #if X.dtype != np.double and \
+ # (mstr != 'hamming' and mstr != 'jaccard'):
+ # TypeError('A double array must be passed.')
+ if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
+ _distance_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
+ _distance_wrap.pdist_euclidean_wrap(_convert_to_double(X), dm)
+ dm = dm ** 2.0
+ elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
+ _distance_wrap.pdist_city_block_wrap(X, dm)
+ elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
+ if X.dtype == np.bool:
+ _distance_wrap.pdist_hamming_bool_wrap(_convert_to_bool(X), dm)
+ else:
+ _distance_wrap.pdist_hamming_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
+ if X.dtype == np.bool:
+ _distance_wrap.pdist_jaccard_bool_wrap(_convert_to_bool(X), dm)
+ else:
+ _distance_wrap.pdist_jaccard_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
+ _distance_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
+ elif mstr in set(['minkowski', 'mi', 'm']):
+ _distance_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
+ elif mstr in wmink_names:
+ _distance_wrap.pdist_weighted_minkowski_wrap(_convert_to_double(X),
+ dm, p, np.asarray(w))
+ elif mstr in set(['seuclidean', 'se', 's']):
+ if V is not None:
+ V = np.asarray(V, order='c')
+ if type(V) != np.ndarray:
+ raise TypeError('Variance vector V must be a numpy array')
+ if V.dtype != np.double:
+ raise TypeError('Variance vector V must contain doubles.')
+ if len(V.shape) != 1:
+ raise ValueError('Variance vector V must '
+ 'be one-dimensional.')
+ if V.shape[0] != n:
+ raise ValueError('Variance vector V must be of the same '
+ 'dimension as the vectors on which the distances '
+ 'are computed.')
+ # The C code doesn't do striding.
+ [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
+ else:
+ VV = np.var(X, axis=0, ddof=1)
+ _distance_wrap.pdist_seuclidean_wrap(_convert_to_double(X), VV, dm)
+ # Need to test whether vectorized cosine works better.
+ # Find out: Is there a dot subtraction operator so I can
+ # subtract matrices in a similar way to multiplying them?
+ # Need to get rid of as much unnecessary C code as possible.
+ elif mstr in set(['cosine', 'cos']):
+ norms = np.sqrt(np.sum(X * X, axis=1))
+ _distance_wrap.pdist_cosine_wrap(_convert_to_double(X), dm, norms)
+ elif mstr in set(['old_cosine', 'old_cos']):
+ norms = np.sqrt(np.sum(X * X, axis=1))
+ nV = norms.reshape(m, 1)
+ # The numerator u * v
+ nm = np.dot(X, X.T)
+ # The denom. ||u||*||v||
+ de = np.dot(nV, nV.T)
+ dm = 1.0 - (nm / de)
+ dm[xrange(0, m), xrange(0, m)] = 0.0
+ dm = squareform(dm)
+ elif mstr in set(['correlation', 'co']):
+ X2 = X - X.mean(1)[:, np.newaxis]
+ #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
+ norms = np.sqrt(np.sum(X2 * X2, axis=1))
+ _distance_wrap.pdist_cosine_wrap(_convert_to_double(X2),
+ _convert_to_double(dm),
+ _convert_to_double(norms))
+ elif mstr in set(['mahalanobis', 'mahal', 'mah']):
+ if VI is not None:
+ VI = _convert_to_double(np.asarray(VI, order='c'))
+ if type(VI) != np.ndarray:
+ raise TypeError('VI must be a numpy array.')
+ if VI.dtype != np.double:
+ raise TypeError('The array must contain 64-bit floats.')
+ [VI] = _copy_arrays_if_base_present([VI])
+ else:
+ V = np.cov(X.T)
+ VI = _convert_to_double(np.linalg.inv(V).T.copy())
+ # (u-v)V^(-1)(u-v)^T
+ _distance_wrap.pdist_mahalanobis_wrap(_convert_to_double(X),
+ VI, dm)
+ elif mstr == 'canberra':
+ _distance_wrap.pdist_canberra_wrap(_convert_to_double(X), dm)
+ elif mstr == 'braycurtis':
+ _distance_wrap.pdist_bray_curtis_wrap(_convert_to_double(X), dm)
+ elif mstr == 'yule':
+ _distance_wrap.pdist_yule_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'matching':
+ _distance_wrap.pdist_matching_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'kulsinski':
+ _distance_wrap.pdist_kulsinski_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'dice':
+ _distance_wrap.pdist_dice_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'rogerstanimoto':
+ _distance_wrap.pdist_rogerstanimoto_bool_wrap(_convert_to_bool(X),
+ dm)
+ elif mstr == 'russellrao':
+ _distance_wrap.pdist_russellrao_bool_wrap(_convert_to_bool(X), dm)
+ elif mstr == 'sokalmichener':
+ _distance_wrap.pdist_sokalmichener_bool_wrap(_convert_to_bool(X),
+ dm)
+ elif mstr == 'sokalsneath':
+ _distance_wrap.pdist_sokalsneath_bool_wrap(_convert_to_bool(X), dm)
+ elif metric == 'test_euclidean':
+ dm = pdist(X, euclidean)
+ elif metric == 'test_sqeuclidean':
+ if V is None:
+ V = np.var(X, axis=0, ddof=1)
+ else:
+ V = np.asarray(V, order='c')
+ dm = pdist(X, lambda u, v: seuclidean(u, v, V))
+ elif metric == 'test_braycurtis':
+ dm = pdist(X, braycurtis)
+ elif metric == 'test_mahalanobis':
+ if VI is None:
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V)
+ else:
+ VI = np.asarray(VI, order='c')
+ [VI] = _copy_arrays_if_base_present([VI])
+ # (u-v)V^(-1)(u-v)^T
+ dm = pdist(X, (lambda u, v: mahalanobis(u, v, VI)))
+ elif metric == 'test_canberra':
+ dm = pdist(X, canberra)
+ elif metric == 'test_cityblock':
+ dm = pdist(X, cityblock)
+ elif metric == 'test_minkowski':
+ dm = pdist(X, minkowski, p=p)
+ elif metric == 'test_wminkowski':
+ dm = pdist(X, wminkowski, p=p, w=w)
+ elif metric == 'test_cosine':
+ dm = pdist(X, cosine)
+ elif metric == 'test_correlation':
+ dm = pdist(X, correlation)
+ elif metric == 'test_hamming':
+ dm = pdist(X, hamming)
+ elif metric == 'test_jaccard':
+ dm = pdist(X, jaccard)
+ elif metric == 'test_chebyshev' or metric == 'test_chebychev':
+ dm = pdist(X, chebyshev)
+ elif metric == 'test_yule':
+ dm = pdist(X, yule)
+ elif metric == 'test_matching':
+ dm = pdist(X, matching)
+ elif metric == 'test_dice':
+ dm = pdist(X, dice)
+ elif metric == 'test_kulsinski':
+ dm = pdist(X, kulsinski)
+ elif metric == 'test_rogerstanimoto':
+ dm = pdist(X, rogerstanimoto)
+ elif metric == 'test_russellrao':
+ dm = pdist(X, russellrao)
+ elif metric == 'test_sokalsneath':
+ dm = pdist(X, sokalsneath)
+ elif metric == 'test_sokalmichener':
+ dm = pdist(X, sokalmichener)
+ else:
+ raise ValueError('Unknown Distance Metric: %s' % mstr)
+ else:
+ raise TypeError('2nd argument metric must be a string identifier '
+ 'or a function.')
+ return dm
+
+
+def squareform(X, force="no", checks=True):
+ r"""
+ Converts a vector-form distance vector to a square-form distance
+ matrix, and vice-versa.
+
+ Parameters
+ ----------
+ X : ndarray
+ Either a condensed or redundant distance matrix.
+
+ Returns
+ -------
+ Y : ndarray
+ If a condensed distance matrix is passed, a redundant
+ one is returned, or if a redundant one is passed, a
+ condensed distance matrix is returned.
+
+ force : string
+ As with MATLAB(TM), if force is equal to 'tovector' or
+ 'tomatrix', the input will be treated as a distance matrix
+ or distance vector respectively.
+
+ checks : bool
+ If ``checks`` is set to ``False``, no checks will be made
+ for matrix symmetry nor zero diagonals. This is useful if
+ it is known that ``X - X.T1`` is small and ``diag(X)`` is
+ close to zero. These values are ignored any way so they do
+ not disrupt the squareform transformation.
+
+
+ Calling Conventions
+ -------------------
+
+ 1. v = squareform(X)
+
+ Given a square d by d symmetric distance matrix ``X``,
+ ``v=squareform(X)`` returns a :math:`d*(d-1)/2` (or
+ `${n \choose 2}$`) sized vector v.
+
+ v[{n \choose 2}-{n-i \choose 2} + (j-i-1)] is the distance
+ between points i and j. If X is non-square or asymmetric, an error
+ is returned.
+
+ X = squareform(v)
+
+ Given a d*d(-1)/2 sized v for some integer d>=2 encoding distances
+ as described, X=squareform(v) returns a d by d distance matrix X. The
+ X[i, j] and X[j, i] values are set to
+ v[{n \choose 2}-{n-i \choose 2} + (j-u-1)] and all
+ diagonal elements are zero.
+
+ """
+
+ X = _convert_to_double(np.asarray(X, order='c'))
+
+ if not np.issubsctype(X, np.double):
+ raise TypeError('A double array must be passed.')
+
+ s = X.shape
+
+ if force.lower() == 'tomatrix':
+ if len(s) != 1:
+ raise ValueError("Forcing 'tomatrix' but input X is not a "
+ "distance vector.")
+ elif force.lower() == 'tovector':
+ if len(s) != 2:
+ raise ValueError("Forcing 'tovector' but input X is not a "
+ "distance matrix.")
+
+ # X = squareform(v)
+ if len(s) == 1:
+ if X.shape[0] == 0:
+ return np.zeros((1, 1), dtype=np.double)
+
+ # Grab the closest value to the square root of the number
+ # of elements times 2 to see if the number of elements
+ # is indeed a binomial coefficient.
+ d = int(np.ceil(np.sqrt(X.shape[0] * 2)))
+
+ # Check that v is of valid dimensions.
+ if d * (d - 1) / 2 != int(s[0]):
+ raise ValueError('Incompatible vector size. It must be a binomial '
+ 'coefficient n choose 2 for some integer n >= 2.')
+
+ # Allocate memory for the distance matrix.
+ M = np.zeros((d, d), dtype=np.double)
+
+ # Since the C code does not support striding using strides.
+ # The dimensions are used instead.
+ [X] = _copy_arrays_if_base_present([X])
+
+ # Fill in the values of the distance matrix.
+ _distance_wrap.to_squareform_from_vector_wrap(M, X)
+
+ # Return the distance matrix.
+ M = M + M.transpose()
+ return M
+ elif len(s) == 2:
+ if s[0] != s[1]:
+ raise ValueError('The matrix argument must be square.')
+ if checks:
+ is_valid_dm(X, throw=True, name='X')
+
+ # One-side of the dimensions is set here.
+ d = s[0]
+
+ if d <= 1:
+ return np.array([], dtype=np.double)
+
+ # Create a vector.
+ v = np.zeros(((d * (d - 1) / 2),), dtype=np.double)
+
+ # Since the C code does not support striding using strides.
+ # The dimensions are used instead.
+ [X] = _copy_arrays_if_base_present([X])
+
+ # Convert the vector to squareform.
+ _distance_wrap.to_vector_from_squareform_wrap(X, v)
+ return v
+ else:
+ raise ValueError(('The first argument must be one or two dimensional '
+ 'array. A %d-dimensional array is not '
+ 'permitted') % len(s))
+
+
+def is_valid_dm(D, tol=0.0, throw=False, name="D", warning=False):
+ """
+ Returns True if the variable D passed is a valid distance matrix.
+ Distance matrices must be 2-dimensional numpy arrays containing
+ doubles. They must have a zero-diagonal, and they must be symmetric.
+
+ Parameters
+ ----------
+ D : ndarray
+ The candidate object to test for validity.
+ tol : double
+ The distance matrix should be symmetric. tol is the maximum
+ difference between the :math:`ij`th entry and the
+ :math:`ji`th entry for the distance metric to be
+ considered symmetric.
+ throw : bool
+ An exception is thrown if the distance matrix passed is not
+ valid.
+ name : string
+ the name of the variable to checked. This is useful if
+ throw is set to ``True`` so the offending variable can be
+ identified in the exception message when an exception is
+ thrown.
+ warning : bool
+ Instead of throwing an exception, a warning message is
+ raised.
+
+ Returns
+ -------
+ Returns ``True`` if the variable ``D`` passed is a valid
+ distance matrix. Small numerical differences in ``D`` and
+ ``D.T`` and non-zeroness of the diagonal are ignored if they are
+ within the tolerance specified by ``tol``.
+ """
+ D = np.asarray(D, order='c')
+ valid = True
+ try:
+ s = D.shape
+ if D.dtype != np.double:
+ if name:
+ raise TypeError(('Distance matrix \'%s\' must contain doubles '
+ '(double).') % name)
+ else:
+ raise TypeError('Distance matrix must contain doubles '
+ '(double).')
+ if len(D.shape) != 2:
+ if name:
+ raise ValueError(('Distance matrix \'%s\' must have shape=2 '
+ '(i.e. be two-dimensional).') % name)
+ else:
+ raise ValueError('Distance matrix must have shape=2 (i.e. '
+ 'be two-dimensional).')
+ if tol == 0.0:
+ if not (D == D.T).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' must be '
+ 'symmetric.') % name)
+ else:
+ raise ValueError('Distance matrix must be symmetric.')
+ if not (D[xrange(0, s[0]), xrange(0, s[0])] == 0).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' diagonal must '
+ 'be zero.') % name)
+ else:
+ raise ValueError('Distance matrix diagonal must be zero.')
+ else:
+ if not (D - D.T <= tol).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' must be '
+ 'symmetric within tolerance %d.')
+ % (name, tol))
+ else:
+ raise ValueError('Distance matrix must be symmetric within'
+ ' tolerance %5.5f.' % tol)
+ if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all():
+ if name:
+ raise ValueError(('Distance matrix \'%s\' diagonal must be'
+ ' close to zero within tolerance %5.5f.')
+ % (name, tol))
+ else:
+ raise ValueError(('Distance matrix \'%s\' diagonal must be'
+ ' close to zero within tolerance %5.5f.')
+ % tol)
+ except Exception, e:
+ if throw:
+ raise
+ if warning:
+ warnings.warn(str(e))
+ valid = False
+ return valid
+
+
+def is_valid_y(y, warning=False, throw=False, name=None):
+ r"""
+ Returns ``True`` if the variable ``y`` passed is a valid condensed
+ distance matrix. Condensed distance matrices must be 1-dimensional
+ numpy arrays containing doubles. Their length must be a binomial
+ coefficient :math:`{n \choose 2}` for some positive integer n.
+
+
+ Parameters
+ ----------
+ y : ndarray
+ The condensed distance matrix.
+ warning : bool, optional
+ Invokes a warning if the variable passed is not a valid
+ condensed distance matrix. The warning message explains why
+ the distance matrix is not valid. 'name' is used when
+ referencing the offending variable.
+ throws : throw, optional
+ Throws an exception if the variable passed is not a valid
+ condensed distance matrix.
+ name : bool, optional
+ Used when referencing the offending variable in the
+ warning or exception message.
+
+ """
+ y = np.asarray(y, order='c')
+ valid = True
+ try:
+ if type(y) != np.ndarray:
+ if name:
+ raise TypeError(('\'%s\' passed as a condensed distance '
+ 'matrix is not a numpy array.') % name)
+ else:
+ raise TypeError('Variable is not a numpy array.')
+ if y.dtype != np.double:
+ if name:
+ raise TypeError(('Condensed distance matrix \'%s\' must '
+ 'contain doubles (double).') % name)
+ else:
+ raise TypeError('Condensed distance matrix must contain '
+ 'doubles (double).')
+ if len(y.shape) != 1:
+ if name:
+ raise ValueError(('Condensed distance matrix \'%s\' must '
+ 'have shape=1 (i.e. be one-dimensional).')
+ % name)
+ else:
+ raise ValueError('Condensed distance matrix must have shape=1 '
+ '(i.e. be one-dimensional).')
+ n = y.shape[0]
+ d = int(np.ceil(np.sqrt(n * 2)))
+ if (d * (d - 1) / 2) != n:
+ if name:
+ raise ValueError(('Length n of condensed distance matrix '
+ '\'%s\' must be a binomial coefficient, i.e.'
+ 'there must be a k such that '
+ '(k \choose 2)=n)!') % name)
+ else:
+ raise ValueError('Length n of condensed distance matrix must '
+ 'be a binomial coefficient, i.e. there must '
+ 'be a k such that (k \choose 2)=n)!')
+ except Exception, e:
+ if throw:
+ raise
+ if warning:
+ warnings.warn(str(e))
+ valid = False
+ return valid
+
+
+def num_obs_dm(d):
+ """
+ Returns the number of original observations that correspond to a
+ square, redundant distance matrix ``D``.
+
+ Parameters
+ ----------
+ d : ndarray
+ The target distance matrix.
+
+ Returns
+ -------
+ numobs : int
+ The number of observations in the redundant distance matrix.
+ """
+ d = np.asarray(d, order='c')
+ is_valid_dm(d, tol=np.inf, throw=True, name='d')
+ return d.shape[0]
+
+
+def num_obs_y(Y):
+ """
+ Returns the number of original observations that correspond to a
+ condensed distance matrix ``Y``.
+
+ Parameters
+ ----------
+ Y : ndarray
+ The number of original observations in the condensed
+ observation ``Y``.
+
+ Returns
+ -------
+ n : int
+ The number of observations in the condensed distance matrix
+ passed.
+ """
+ Y = np.asarray(Y, order='c')
+ is_valid_y(Y, throw=True, name='Y')
+ k = Y.shape[0]
+ if k == 0:
+ raise ValueError("The number of observations cannot be determined on "
+ "an empty distance matrix.")
+ d = int(np.ceil(np.sqrt(k * 2)))
+ if (d * (d - 1) / 2) != k:
+ raise ValueError("Invalid condensed distance matrix passed. Must be "
+ "some k where k=(n choose 2) for some n >= 2.")
+ return d
+
+
+def cdist(XA, XB, metric='euclidean', p=2, V=None, VI=None, w=None):
+ r"""
+ Computes distance between each pair of observation vectors in the
+ Cartesian product of two collections of vectors. ``XA`` is a
+ :math:`m_A` by :math:`n` array while ``XB`` is a :math:`m_B` by
+ :math:`n` array. A :math:`m_A` by :math:`m_B` array is
+ returned. An exception is thrown if ``XA`` and ``XB`` do not have
+ the same number of columns.
+
+ A rectangular distance matrix ``Y`` is returned. For each :math:`i`
+ and :math:`j`, the metric ``dist(u=XA[i], v=XB[j])`` is computed
+ and stored in the :math:`ij` th entry.
+
+ The following are common calling conventions:
+
+ 1. ``Y = cdist(XA, XB, 'euclidean')``
+
+ Computes the distance between :math:`m` points using
+ Euclidean distance (2-norm) as the distance metric between the
+ points. The points are arranged as :math:`m`
+ :math:`n`-dimensional row vectors in the matrix X.
+
+ 2. ``Y = cdist(XA, XB, 'minkowski', p)``
+
+ Computes the distances using the Minkowski distance
+ :math:`||u-v||_p` (:math:`p`-norm) where :math:`p \geq 1`.
+
+ 3. ``Y = cdist(XA, XB, 'cityblock')``
+
+ Computes the city block or Manhattan distance between the
+ points.
+
+ 4. ``Y = cdist(XA, XB, 'seuclidean', V=None)``
+
+ Computes the standardized Euclidean distance. The standardized
+ Euclidean distance between two n-vectors ``u`` and ``v`` is
+
+ .. math::
+
+ \sqrt{\sum {(u_i-v_i)^2 / V[x_i]}}.
+
+ V is the variance vector; V[i] is the variance computed over all
+ the i'th components of the points. If not passed, it is
+ automatically computed.
+
+ 5. ``Y = cdist(XA, XB, 'sqeuclidean')``
+
+ Computes the squared Euclidean distance :math:`||u-v||_2^2` between
+ the vectors.
+
+ 6. ``Y = cdist(XA, XB, 'cosine')``
+
+ Computes the cosine distance between vectors u and v,
+
+ .. math::
+
+ \frac{1 - uv^T}
+ {{|u|}_2 {|v|}_2}
+
+ where :math:`|*|_2` is the 2-norm of its argument *.
+
+ 7. ``Y = cdist(XA, XB, 'correlation')``
+
+ Computes the correlation distance between vectors u and v. This is
+
+ .. math::
+
+ \frac{1 - (u - n{|u|}_1){(v - n{|v|}_1)}^T}
+ {{|(u - n{|u|}_1)|}_2 {|(v - n{|v|}_1)|}^T}
+
+ where :math:`|*|_1` is the Manhattan (or 1-norm) of its
+ argument, and :math:`n` is the common dimensionality of the
+ vectors.
+
+ 8. ``Y = cdist(XA, XB, 'hamming')``
+
+ Computes the normalized Hamming distance, or the proportion of
+ those vector elements between two n-vectors ``u`` and ``v``
+ which disagree. To save memory, the matrix ``X`` can be of type
+ boolean.
+
+ 9. ``Y = cdist(XA, XB, 'jaccard')``
+
+ Computes the Jaccard distance between the points. Given two
+ vectors, ``u`` and ``v``, the Jaccard distance is the
+ proportion of those elements ``u[i]`` and ``v[i]`` that
+ disagree where at least one of them is non-zero.
+
+ 10. ``Y = cdist(XA, XB, 'chebyshev')``
+
+ Computes the Chebyshev distance between the points. The
+ Chebyshev distance between two n-vectors ``u`` and ``v`` is the
+ maximum norm-1 distance between their respective elements. More
+ precisely, the distance is given by
+
+ .. math::
+
+ d(u,v) = \max_i {|u_i-v_i|}.
+
+ 11. ``Y = cdist(XA, XB, 'canberra')``
+
+ Computes the Canberra distance between the points. The
+ Canberra distance between two points ``u`` and ``v`` is
+
+ .. math::
+
+ d(u,v) = \sum_u \frac{|u_i-v_i|}
+ {(|u_i|+|v_i|)}
+
+
+ 12. ``Y = cdist(XA, XB, 'braycurtis')``
+
+ Computes the Bray-Curtis distance between the points. The
+ Bray-Curtis distance between two points ``u`` and ``v`` is
+
+
+ .. math::
+
+ d(u,v) = \frac{\sum_i (u_i-v_i)}
+ {\sum_i (u_i+v_i)}
+
+ 13. ``Y = cdist(XA, XB, 'mahalanobis', VI=None)``
+
+ Computes the Mahalanobis distance between the points. The
+ Mahalanobis distance between two points ``u`` and ``v`` is
+ :math:`(u-v)(1/V)(u-v)^T` where :math:`(1/V)` (the ``VI``
+ variable) is the inverse covariance. If ``VI`` is not None,
+ ``VI`` will be used as the inverse covariance matrix.
+
+ 14. ``Y = cdist(XA, XB, 'yule')``
+
+ Computes the Yule distance between the boolean
+ vectors. (see yule function documentation)
+
+ 15. ``Y = cdist(XA, XB, 'matching')``
+
+ Computes the matching distance between the boolean
+ vectors. (see matching function documentation)
+
+ 16. ``Y = cdist(XA, XB, 'dice')``
+
+ Computes the Dice distance between the boolean vectors. (see
+ dice function documentation)
+
+ 17. ``Y = cdist(XA, XB, 'kulsinski')``
+
+ Computes the Kulsinski distance between the boolean
+ vectors. (see kulsinski function documentation)
+
+ 18. ``Y = cdist(XA, XB, 'rogerstanimoto')``
+
+ Computes the Rogers-Tanimoto distance between the boolean
+ vectors. (see rogerstanimoto function documentation)
+
+ 19. ``Y = cdist(XA, XB, 'russellrao')``
+
+ Computes the Russell-Rao distance between the boolean
+ vectors. (see russellrao function documentation)
+
+ 20. ``Y = cdist(XA, XB, 'sokalmichener')``
+
+ Computes the Sokal-Michener distance between the boolean
+ vectors. (see sokalmichener function documentation)
+
+ 21. ``Y = cdist(XA, XB, 'sokalsneath')``
+
+ Computes the Sokal-Sneath distance between the vectors. (see
+ sokalsneath function documentation)
+
+
+ 22. ``Y = cdist(XA, XB, 'wminkowski')``
+
+ Computes the weighted Minkowski distance between the
+ vectors. (see sokalsneath function documentation)
+
+ 23. ``Y = cdist(XA, XB, f)``
+
+ Computes the distance between all pairs of vectors in X
+ using the user supplied 2-arity function f. For example,
+ Euclidean distance between the vectors could be computed
+ as follows::
+
+ dm = cdist(XA, XB, (lambda u, v: np.sqrt(((u-v)*(u-v).T).sum())))
+
+ Note that you should avoid passing a reference to one of
+ the distance functions defined in this library. For example,::
+
+ dm = cdist(XA, XB, sokalsneath)
+
+ would calculate the pair-wise distances between the vectors in
+ X using the Python function sokalsneath. This would result in
+ sokalsneath being called :math:`{n \choose 2}` times, which
+ is inefficient. Instead, the optimized C version is more
+ efficient, and we call it using the following syntax.::
+
+ dm = cdist(XA, XB, 'sokalsneath')
+
+ Parameters
+ ----------
+ XA : ndarray
+ An :math:`m_A` by :math:`n` array of :math:`m_A`
+ original observations in an :math:`n`-dimensional space.
+ XB : ndarray
+ An :math:`m_B` by :math:`n` array of :math:`m_B`
+ original observations in an :math:`n`-dimensional space.
+ metric : string or function
+ The distance metric to use. The distance function can
+ be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
+ 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
+ 'jaccard', 'kulsinski', 'mahalanobis', 'matching',
+ 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
+ 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski',
+ 'yule'.
+ w : ndarray
+ The weight vector (for weighted Minkowski).
+ p : double
+ The p-norm to apply (for Minkowski, weighted and unweighted)
+ V : ndarray
+ The variance vector (for standardized Euclidean).
+ VI : ndarray
+ The inverse of the covariance matrix (for Mahalanobis).
+
+
+ Returns
+ -------
+ Y : ndarray
+ A :math:`m_A` by :math:`m_B` distance matrix.
+ """
+
+# 21. Y = cdist(XA, XB, 'test_Y')
+#
+# Computes the distance between all pairs of vectors in X
+# using the distance metric Y but with a more succint,
+# verifiable, but less efficient implementation.
+
+ XA = np.asarray(XA, order='c')
+ XB = np.asarray(XB, order='c')
+
+ #if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
+ # raise TypeError('Floating point arrays must be 64-bit (got %r).' %
+ # (X.dtype.type,))
+
+ # The C code doesn't do striding.
+ [XA] = _copy_arrays_if_base_present([_convert_to_double(XA)])
+ [XB] = _copy_arrays_if_base_present([_convert_to_double(XB)])
+
+ s = XA.shape
+ sB = XB.shape
+
+ if len(s) != 2:
+ raise ValueError('XA must be a 2-dimensional array.')
+ if len(sB) != 2:
+ raise ValueError('XB must be a 2-dimensional array.')
+ if s[1] != sB[1]:
+ raise ValueError('XA and XB must have the same number of columns '
+ '(i.e. feature dimension.)')
+
+ mA = s[0]
+ mB = sB[0]
+ n = s[1]
+ dm = np.zeros((mA, mB), dtype=np.double)
+
+ if callable(metric):
+ if metric == minkowski:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = minkowski(XA[i, :], XB[j, :], p)
+ elif metric == wminkowski:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = wminkowski(XA[i, :], XB[j, :], p, w)
+ elif metric == seuclidean:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = seuclidean(XA[i, :], XB[j, :], V)
+ elif metric == mahalanobis:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = mahalanobis(XA[i, :], XB[j, :], V)
+ else:
+ for i in xrange(0, mA):
+ for j in xrange(0, mB):
+ dm[i, j] = metric(XA[i, :], XB[j, :])
+ elif isinstance(metric, basestring):
+ mstr = metric.lower()
+
+ #if XA.dtype != np.double and \
+ # (mstr != 'hamming' and mstr != 'jaccard'):
+ # TypeError('A double array must be passed.')
+ if mstr in set(['euclidean', 'euclid', 'eu', 'e']):
+ _distance_wrap.cdist_euclidean_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['sqeuclidean', 'sqe', 'sqeuclid']):
+ _distance_wrap.cdist_euclidean_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ dm **= 2.0
+ elif mstr in set(['cityblock', 'cblock', 'cb', 'c']):
+ _distance_wrap.cdist_city_block_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['hamming', 'hamm', 'ha', 'h']):
+ if XA.dtype == np.bool:
+ _distance_wrap.cdist_hamming_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ else:
+ _distance_wrap.cdist_hamming_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['jaccard', 'jacc', 'ja', 'j']):
+ if XA.dtype == np.bool:
+ _distance_wrap.cdist_jaccard_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ else:
+ _distance_wrap.cdist_jaccard_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['chebychev', 'chebyshev', 'cheby', 'cheb', 'ch']):
+ _distance_wrap.cdist_chebyshev_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr in set(['minkowski', 'mi', 'm', 'pnorm']):
+ _distance_wrap.cdist_minkowski_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm, p)
+ elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']):
+ _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(XA),
+ _convert_to_double(XB),
+ dm, p,
+ _convert_to_double(w))
+ elif mstr in set(['seuclidean', 'se', 's']):
+ if V is not None:
+ V = np.asarray(V, order='c')
+ if type(V) != np.ndarray:
+ raise TypeError('Variance vector V must be a numpy array')
+ if V.dtype != np.double:
+ raise TypeError('Variance vector V must contain doubles.')
+ if len(V.shape) != 1:
+ raise ValueError('Variance vector V must be '
+ 'one-dimensional.')
+ if V.shape[0] != n:
+ raise ValueError('Variance vector V must be of the same '
+ 'dimension as the vectors on which the '
+ 'distances are computed.')
+ # The C code doesn't do striding.
+ [VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
+ else:
+ X = np.vstack([XA, XB])
+ VV = np.var(X, axis=0, ddof=1)
+ X = None
+ del X
+ _distance_wrap.cdist_seuclidean_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), VV, dm)
+ # Need to test whether vectorized cosine works better.
+ # Find out: Is there a dot subtraction operator so I can
+ # subtract matrices in a similar way to multiplying them?
+ # Need to get rid of as much unnecessary C code as possible.
+ elif mstr in set(['cosine', 'cos']):
+ normsA = np.sqrt(np.sum(XA * XA, axis=1))
+ normsB = np.sqrt(np.sum(XB * XB, axis=1))
+ _distance_wrap.cdist_cosine_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm,
+ normsA,
+ normsB)
+ elif mstr in set(['correlation', 'co']):
+ XA2 = XA - XA.mean(1)[:, np.newaxis]
+ XB2 = XB - XB.mean(1)[:, np.newaxis]
+ #X2 = X - np.matlib.repmat(np.mean(X, axis=1).reshape(m, 1), 1, n)
+ normsA = np.sqrt(np.sum(XA2 * XA2, axis=1))
+ normsB = np.sqrt(np.sum(XB2 * XB2, axis=1))
+ _distance_wrap.cdist_cosine_wrap(_convert_to_double(XA2),
+ _convert_to_double(XB2),
+ _convert_to_double(dm),
+ _convert_to_double(normsA),
+ _convert_to_double(normsB))
+ elif mstr in set(['mahalanobis', 'mahal', 'mah']):
+ if VI is not None:
+ VI = _convert_to_double(np.asarray(VI, order='c'))
+ if type(VI) != np.ndarray:
+ raise TypeError('VI must be a numpy array.')
+ if VI.dtype != np.double:
+ raise TypeError('The array must contain 64-bit floats.')
+ [VI] = _copy_arrays_if_base_present([VI])
+ else:
+ X = np.vstack([XA, XB])
+ V = np.cov(X.T)
+ X = None
+ del X
+ VI = _convert_to_double(np.linalg.inv(V).T.copy())
+ # (u-v)V^(-1)(u-v)^T
+ _distance_wrap.cdist_mahalanobis_wrap(_convert_to_double(XA),
+ _convert_to_double(XB),
+ VI, dm)
+ elif mstr == 'canberra':
+ _distance_wrap.cdist_canberra_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr == 'braycurtis':
+ _distance_wrap.cdist_bray_curtis_wrap(_convert_to_double(XA),
+ _convert_to_double(XB), dm)
+ elif mstr == 'yule':
+ _distance_wrap.cdist_yule_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'matching':
+ _distance_wrap.cdist_matching_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'kulsinski':
+ _distance_wrap.cdist_kulsinski_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'dice':
+ _distance_wrap.cdist_dice_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'rogerstanimoto':
+ _distance_wrap.cdist_rogerstanimoto_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ elif mstr == 'russellrao':
+ _distance_wrap.cdist_russellrao_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB), dm)
+ elif mstr == 'sokalmichener':
+ _distance_wrap.cdist_sokalmichener_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ elif mstr == 'sokalsneath':
+ _distance_wrap.cdist_sokalsneath_bool_wrap(_convert_to_bool(XA),
+ _convert_to_bool(XB),
+ dm)
+ elif metric == 'test_euclidean':
+ dm = cdist(XA, XB, euclidean)
+ elif metric == 'test_seuclidean':
+ if V is None:
+ V = np.var(np.vstack([XA, XB]), axis=0, ddof=1)
+ else:
+ V = np.asarray(V, order='c')
+ dm = cdist(XA, XB, lambda u, v: seuclidean(u, v, V))
+ elif metric == 'test_sqeuclidean':
+ dm = cdist(XA, XB, lambda u, v: sqeuclidean(u, v))
+ elif metric == 'test_braycurtis':
+ dm = cdist(XA, XB, braycurtis)
+ elif metric == 'test_mahalanobis':
+ if VI is None:
+ X = np.vstack([XA, XB])
+ V = np.cov(X.T)
+ VI = np.linalg.inv(V)
+ X = None
+ del X
+ else:
+ VI = np.asarray(VI, order='c')
+ [VI] = _copy_arrays_if_base_present([VI])
+ # (u-v)V^(-1)(u-v)^T
+ dm = cdist(XA, XB, (lambda u, v: mahalanobis(u, v, VI)))
+ elif metric == 'test_canberra':
+ dm = cdist(XA, XB, canberra)
+ elif metric == 'test_cityblock':
+ dm = cdist(XA, XB, cityblock)
+ elif metric == 'test_minkowski':
+ dm = cdist(XA, XB, minkowski, p=p)
+ elif metric == 'test_wminkowski':
+ dm = cdist(XA, XB, wminkowski, p=p, w=w)
+ elif metric == 'test_cosine':
+ dm = cdist(XA, XB, cosine)
+ elif metric == 'test_correlation':
+ dm = cdist(XA, XB, correlation)
+ elif metric == 'test_hamming':
+ dm = cdist(XA, XB, hamming)
+ elif metric == 'test_jaccard':
+ dm = cdist(XA, XB, jaccard)
+ elif metric == 'test_chebyshev' or metric == 'test_chebychev':
+ dm = cdist(XA, XB, chebyshev)
+ elif metric == 'test_yule':
+ dm = cdist(XA, XB, yule)
+ elif metric == 'test_matching':
+ dm = cdist(XA, XB, matching)
+ elif metric == 'test_dice':
+ dm = cdist(XA, XB, dice)
+ elif metric == 'test_kulsinski':
+ dm = cdist(XA, XB, kulsinski)
+ elif metric == 'test_rogerstanimoto':
+ dm = cdist(XA, XB, rogerstanimoto)
+ elif metric == 'test_russellrao':
+ dm = cdist(XA, XB, russellrao)
+ elif metric == 'test_sokalsneath':
+ dm = cdist(XA, XB, sokalsneath)
+ elif metric == 'test_sokalmichener':
+ dm = cdist(XA, XB, sokalmichener)
+ else:
+ raise ValueError('Unknown Distance Metric: %s' % mstr)
+ else:
+ raise TypeError('2nd argument metric must be a string identifier '
+ 'or a function.')
+ return dm
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/kdtree.py
--- /dev/null
+++ b/yt/utilities/spatial/kdtree.py
@@ -0,0 +1,881 @@
+# Copyright Anne M. Archibald 2008
+# Released under the scipy license
+import sys
+import numpy as np
+from heapq import heappush, heappop
+#import scipy.sparse
+
+__all__ = ['minkowski_distance_p', 'minkowski_distance',
+ 'distance_matrix',
+ 'Rectangle', 'KDTree']
+
+
+def minkowski_distance_p(x, y, p=2):
+ """
+ Compute the p-th power of the L**p distance between x and y.
+
+ For efficiency, this function computes the L**p distance but does
+ not extract the pth root. If p is 1 or infinity, this is equal to
+ the actual L**p distance.
+
+ Parameters
+ ----------
+ x : array_like, M by K
+
+ y : array_like, N by K
+
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+
+ Examples
+ --------
+ >>> minkowski_distance_p([[0,0],[0,0]], [[1,1],[0,1]])
+ array([2, 1])
+
+ """
+ x = np.asarray(x)
+ y = np.asarray(y)
+ if p == np.inf:
+ return np.amax(np.abs(y-x), axis=-1)
+ elif p == 1:
+ return np.sum(np.abs(y-x), axis=-1)
+ else:
+ return np.sum(np.abs(y-x)**p, axis=-1)
+
+def minkowski_distance(x, y, p=2):
+ """
+ Compute the L**p distance between x and y.
+
+ Parameters
+ ----------
+ x : array_like, M by K
+
+ y : array_like, N by K
+
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+
+ Examples
+ --------
+ >>> minkowski_distance([[0,0],[0,0]], [[1,1],[0,1]])
+ array([ 1.41421356, 1. ])
+
+ """
+ x = np.asarray(x)
+ y = np.asarray(y)
+ if p == np.inf or p == 1:
+ return minkowski_distance_p(x, y, p)
+ else:
+ return minkowski_distance_p(x, y, p)**(1./p)
+
+class Rectangle(object):
+ """Hyperrectangle class.
+
+ Represents a Cartesian product of intervals.
+ """
+ def __init__(self, maxes, mins):
+ """Construct a hyperrectangle."""
+ self.maxes = np.maximum(maxes,mins).astype(np.float)
+ self.mins = np.minimum(maxes,mins).astype(np.float)
+ self.m, = self.maxes.shape
+
+ def __repr__(self):
+ return "<Rectangle %s>" % zip(self.mins, self.maxes)
+
+ def volume(self):
+ """Total volume."""
+ return np.prod(self.maxes-self.mins)
+
+ def split(self, d, split):
+ """Produce two hyperrectangles by splitting along axis d.
+
+ In general, if you need to compute maximum and minimum
+ distances to the children, it can be done more efficiently
+ by updating the maximum and minimum distances to the parent.
+ """ # FIXME: do this
+ mid = np.copy(self.maxes)
+ mid[d] = split
+ less = Rectangle(self.mins, mid)
+ mid = np.copy(self.mins)
+ mid[d] = split
+ greater = Rectangle(mid, self.maxes)
+ return less, greater
+
+ def min_distance_point(self, x, p=2.):
+ """Compute the minimum distance between x and a point in the hyperrectangle."""
+ return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-x,x-self.maxes)),p)
+
+ def max_distance_point(self, x, p=2.):
+ """Compute the maximum distance between x and a point in the hyperrectangle."""
+ return minkowski_distance(0, np.maximum(self.maxes-x,x-self.mins),p)
+
+ def min_distance_rectangle(self, other, p=2.):
+ """Compute the minimum distance between points in the two hyperrectangles."""
+ return minkowski_distance(0, np.maximum(0,np.maximum(self.mins-other.maxes,other.mins-self.maxes)),p)
+
+ def max_distance_rectangle(self, other, p=2.):
+ """Compute the maximum distance between points in the two hyperrectangles."""
+ return minkowski_distance(0, np.maximum(self.maxes-other.mins,other.maxes-self.mins),p)
+
+
+class KDTree(object):
+ """
+ kd-tree for quick nearest-neighbor lookup
+
+ This class provides an index into a set of k-dimensional points
+ which can be used to rapidly look up the nearest neighbors of any
+ point.
+
+ The algorithm used is described in Maneewongvatana and Mount 1999.
+ The general idea is that the kd-tree is a binary tree, each of whose
+ nodes represents an axis-aligned hyperrectangle. Each node specifies
+ an axis and splits the set of points based on whether their coordinate
+ along that axis is greater than or less than a particular value.
+
+ During construction, the axis and splitting point are chosen by the
+ "sliding midpoint" rule, which ensures that the cells do not all
+ become long and thin.
+
+ The tree can be queried for the r closest neighbors of any given point
+ (optionally returning only those within some maximum distance of the
+ point). It can also be queried, with a substantial gain in efficiency,
+ for the r approximate closest neighbors.
+
+ For large dimensions (20 is already large) do not expect this to run
+ significantly faster than brute force. High-dimensional nearest-neighbor
+ queries are a substantial open problem in computer science.
+
+ The tree also supports all-neighbors queries, both with arrays of points
+ and with other kd-trees. These do use a reasonably efficient algorithm,
+ but the kd-tree is not necessarily the best data structure for this
+ sort of calculation.
+
+ """
+ def __init__(self, data, leafsize=10):
+ """Construct a kd-tree.
+
+ Parameters
+ ----------
+ data : array_like, shape (n,k)
+ The data points to be indexed. This array is not copied, and
+ so modifying this data will result in bogus results.
+ leafsize : positive int
+ The number of points at which the algorithm switches over to
+ brute-force.
+ """
+ self.data = np.asarray(data)
+ self.n, self.m = np.shape(self.data)
+ self.leafsize = int(leafsize)
+ if self.leafsize<1:
+ raise ValueError("leafsize must be at least 1")
+ self.maxes = np.amax(self.data,axis=0)
+ self.mins = np.amin(self.data,axis=0)
+
+ self.tree = self.__build(np.arange(self.n), self.maxes, self.mins)
+
+ class node(object):
+ if sys.version_info[0] >= 3:
+ def __lt__(self, other): id(self) < id(other)
+ def __gt__(self, other): id(self) > id(other)
+ def __le__(self, other): id(self) <= id(other)
+ def __ge__(self, other): id(self) >= id(other)
+ def __eq__(self, other): id(self) == id(other)
+
+ class leafnode(node):
+ def __init__(self, idx):
+ self.idx = idx
+ self.children = len(idx)
+
+ class innernode(node):
+ def __init__(self, split_dim, split, less, greater):
+ self.split_dim = split_dim
+ self.split = split
+ self.less = less
+ self.greater = greater
+ self.children = less.children+greater.children
+
+ def __build(self, idx, maxes, mins):
+ if len(idx)<=self.leafsize:
+ return KDTree.leafnode(idx)
+ else:
+ data = self.data[idx]
+ #maxes = np.amax(data,axis=0)
+ #mins = np.amin(data,axis=0)
+ d = np.argmax(maxes-mins)
+ maxval = maxes[d]
+ minval = mins[d]
+ if maxval==minval:
+ # all points are identical; warn user?
+ return KDTree.leafnode(idx)
+ data = data[:,d]
+
+ # sliding midpoint rule; see Maneewongvatana and Mount 1999
+ # for arguments that this is a good idea.
+ split = (maxval+minval)/2
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(less_idx)==0:
+ split = np.amin(data)
+ less_idx = np.nonzero(data<=split)[0]
+ greater_idx = np.nonzero(data>split)[0]
+ if len(greater_idx)==0:
+ split = np.amax(data)
+ less_idx = np.nonzero(data<split)[0]
+ greater_idx = np.nonzero(data>=split)[0]
+ if len(less_idx)==0:
+ # _still_ zero? all must have the same value
+ if not np.all(data==data[0]):
+ raise ValueError("Troublesome data array: %s" % data)
+ split = data[0]
+ less_idx = np.arange(len(data)-1)
+ greater_idx = np.array([len(data)-1])
+
+ lessmaxes = np.copy(maxes)
+ lessmaxes[d] = split
+ greatermins = np.copy(mins)
+ greatermins[d] = split
+ return KDTree.innernode(d, split,
+ self.__build(idx[less_idx],lessmaxes,mins),
+ self.__build(idx[greater_idx],maxes,greatermins))
+
+ def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+
+ side_distances = np.maximum(0,np.maximum(x-self.maxes,self.mins-x))
+ if p!=np.inf:
+ side_distances**=p
+ min_distance = np.sum(side_distances)
+ else:
+ min_distance = np.amax(side_distances)
+
+ # priority queue for chasing nodes
+ # entries are:
+ # minimum distance between the cell and the target
+ # distances between the nearest side of the cell and the target
+ # the head node of the cell
+ q = [(min_distance,
+ tuple(side_distances),
+ self.tree)]
+ # priority queue for the nearest neighbors
+ # furthest known neighbor first
+ # entries are (-distance**p, i)
+ neighbors = []
+
+ if eps==0:
+ epsfac=1
+ elif p==np.inf:
+ epsfac = 1/(1+eps)
+ else:
+ epsfac = 1/(1+eps)**p
+
+ if p!=np.inf and distance_upper_bound!=np.inf:
+ distance_upper_bound = distance_upper_bound**p
+
+ while q:
+ min_distance, side_distances, node = heappop(q)
+ if isinstance(node, KDTree.leafnode):
+ # brute-force
+ data = self.data[node.idx]
+ ds = minkowski_distance_p(data,x[np.newaxis,:],p)
+ for i in range(len(ds)):
+ if ds[i]<distance_upper_bound:
+ if len(neighbors)==k:
+ heappop(neighbors)
+ heappush(neighbors, (-ds[i], node.idx[i]))
+ if len(neighbors)==k:
+ distance_upper_bound = -neighbors[0][0]
+ else:
+ # we don't push cells that are too far onto the queue at all,
+ # but since the distance_upper_bound decreases, we might get
+ # here even if the cell's too far
+ if min_distance>distance_upper_bound*epsfac:
+ # since this is the nearest cell, we're done, bail out
+ break
+ # compute minimum distances to the children and push them on
+ if x[node.split_dim]<node.split:
+ near, far = node.less, node.greater
+ else:
+ near, far = node.greater, node.less
+
+ # near child is at the same distance as the current node
+ heappush(q,(min_distance, side_distances, near))
+
+ # far child is further by an amount depending only
+ # on the split value
+ sd = list(side_distances)
+ if p == np.inf:
+ min_distance = max(min_distance, abs(node.split-x[node.split_dim]))
+ elif p == 1:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+ else:
+ sd[node.split_dim] = np.abs(node.split-x[node.split_dim])**p
+ min_distance = min_distance - side_distances[node.split_dim] + sd[node.split_dim]
+
+ # far child might be too far, if so, don't bother pushing it
+ if min_distance<=distance_upper_bound*epsfac:
+ heappush(q,(min_distance, tuple(sd), far))
+
+ if p==np.inf:
+ return sorted([(-d,i) for (d,i) in neighbors])
+ else:
+ return sorted([((-d)**(1./p),i) for (d,i) in neighbors])
+
+ def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
+ """
+ Query the kd-tree for nearest neighbors
+
+ Parameters
+ ----------
+ x : array_like, last dimension self.m
+ An array of points to query.
+ k : integer
+ The number of nearest neighbors to return.
+ eps : nonnegative float
+ Return approximate nearest neighbors; the kth returned value
+ is guaranteed to be no further than (1+eps) times the
+ distance to the real kth nearest neighbor.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use.
+ 1 is the sum-of-absolute-values "Manhattan" distance
+ 2 is the usual Euclidean distance
+ infinity is the maximum-coordinate-difference distance
+ distance_upper_bound : nonnegative float
+ Return only neighbors within this distance. This is used to prune
+ tree searches, so if you are doing a series of nearest-neighbor
+ queries, it may help to supply the distance to the nearest neighbor
+ of the most recent point.
+
+ Returns
+ -------
+ d : array of floats
+ The distances to the nearest neighbors.
+ If x has shape tuple+(self.m,), then d has shape tuple if
+ k is one, or tuple+(k,) if k is larger than one. Missing
+ neighbors are indicated with infinite distances. If k is None,
+ then d is an object array of shape tuple, containing lists
+ of distances. In either case the hits are sorted by distance
+ (nearest first).
+ i : array of integers
+ The locations of the neighbors in self.data. i is the same
+ shape as d.
+
+ Examples
+ --------
+ >>> from scipy.spatial import KDTree
+ >>> x, y = np.mgrid[0:5, 2:8]
+ >>> tree = KDTree(zip(x.ravel(), y.ravel()))
+ >>> tree.data
+ array([[0, 2],
+ [0, 3],
+ [0, 4],
+ [0, 5],
+ [0, 6],
+ [0, 7],
+ [1, 2],
+ [1, 3],
+ [1, 4],
+ [1, 5],
+ [1, 6],
+ [1, 7],
+ [2, 2],
+ [2, 3],
+ [2, 4],
+ [2, 5],
+ [2, 6],
+ [2, 7],
+ [3, 2],
+ [3, 3],
+ [3, 4],
+ [3, 5],
+ [3, 6],
+ [3, 7],
+ [4, 2],
+ [4, 3],
+ [4, 4],
+ [4, 5],
+ [4, 6],
+ [4, 7]])
+ >>> pts = np.array([[0, 0], [2.1, 2.9]])
+ >>> tree.query(pts)
+ (array([ 2. , 0.14142136]), array([ 0, 13]))
+
+ """
+ x = np.asarray(x)
+ if np.shape(x)[-1] != self.m:
+ raise ValueError("x must consist of vectors of length %d but has shape %s" % (self.m, np.shape(x)))
+ if p<1:
+ raise ValueError("Only p-norms with 1<=p<=infinity permitted")
+ retshape = np.shape(x)[:-1]
+ if retshape!=():
+ if k is None:
+ dd = np.empty(retshape,dtype=np.object)
+ ii = np.empty(retshape,dtype=np.object)
+ elif k>1:
+ dd = np.empty(retshape+(k,),dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape+(k,),dtype=np.int)
+ ii.fill(self.n)
+ elif k==1:
+ dd = np.empty(retshape,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(retshape,dtype=np.int)
+ ii.fill(self.n)
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+ for c in np.ndindex(retshape):
+ hits = self.__query(x[c], k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k is None:
+ dd[c] = [d for (d,i) in hits]
+ ii[c] = [i for (d,i) in hits]
+ elif k>1:
+ for j in range(len(hits)):
+ dd[c+(j,)], ii[c+(j,)] = hits[j]
+ elif k==1:
+ if len(hits)>0:
+ dd[c], ii[c] = hits[0]
+ else:
+ dd[c] = np.inf
+ ii[c] = self.n
+ return dd, ii
+ else:
+ hits = self.__query(x, k=k, p=p, distance_upper_bound=distance_upper_bound)
+ if k is None:
+ return [d for (d,i) in hits], [i for (d,i) in hits]
+ elif k==1:
+ if len(hits)>0:
+ return hits[0]
+ else:
+ return np.inf, self.n
+ elif k>1:
+ dd = np.empty(k,dtype=np.float)
+ dd.fill(np.inf)
+ ii = np.empty(k,dtype=np.int)
+ ii.fill(self.n)
+ for j in range(len(hits)):
+ dd[j], ii[j] = hits[j]
+ return dd, ii
+ else:
+ raise ValueError("Requested %s nearest neighbors; acceptable numbers are integers greater than or equal to one, or None")
+
+
+ def __query_ball_point(self, x, r, p=2., eps=0):
+ R = Rectangle(self.maxes, self.mins)
+
+ def traverse_checking(node, rect):
+ if rect.min_distance_point(x, p) > r / (1. + eps):
+ return []
+ elif rect.max_distance_point(x, p) < r * (1. + eps):
+ return traverse_no_checking(node)
+ elif isinstance(node, KDTree.leafnode):
+ d = self.data[node.idx]
+ return node.idx[minkowski_distance(d, x, p) <= r].tolist()
+ else:
+ less, greater = rect.split(node.split_dim, node.split)
+ return traverse_checking(node.less, less) + \
+ traverse_checking(node.greater, greater)
+
+ def traverse_no_checking(node):
+ if isinstance(node, KDTree.leafnode):
+ return node.idx.tolist()
+ else:
+ return traverse_no_checking(node.less) + \
+ traverse_no_checking(node.greater)
+
+ return traverse_checking(self.tree, R)
+
+ def query_ball_point(self, x, r, p=2., eps=0):
+ """Find all points within distance r of point(s) x.
+
+ Parameters
+ ----------
+ x : array_like, shape tuple + (self.m,)
+ The point or points to search for neighbors of.
+ r : positive float
+ The radius of points to return.
+ p : float, optional
+ Which Minkowski p-norm to use. Should be in the range [1, inf].
+ eps : nonnegative float, optional
+ Approximate search. Branches of the tree are not explored if their
+ nearest points are further than ``r / (1 + eps)``, and branches are
+ added in bulk if their furthest points are nearer than
+ ``r * (1 + eps)``.
+
+ Returns
+ -------
+ results : list or array of lists
+ If `x` is a single point, returns a list of the indices of the
+ neighbors of `x`. If `x` is an array of points, returns an object
+ array of shape tuple containing lists of neighbors.
+
+ Notes
+ -----
+ If you have many points whose neighbors you want to find, you may save
+ substantial amounts of time by putting them in a KDTree and using
+ query_ball_tree.
+
+ Examples
+ --------
+ >>> from scipy import spatial
+ >>> x, y = np.mgrid[0:4, 0:4]
+ >>> points = zip(x.ravel(), y.ravel())
+ >>> tree = spatial.KDTree(points)
+ >>> tree.query_ball_point([2, 0], 1)
+ [4, 8, 9, 12]
+
+ """
+ x = np.asarray(x)
+ if x.shape[-1] != self.m:
+ raise ValueError("Searching for a %d-dimensional point in a " \
+ "%d-dimensional KDTree" % (x.shape[-1], self.m))
+ if len(x.shape) == 1:
+ return self.__query_ball_point(x, r, p, eps)
+ else:
+ retshape = x.shape[:-1]
+ result = np.empty(retshape, dtype=np.object)
+ for c in np.ndindex(retshape):
+ result[c] = self.__query_ball_point(x[c], r, p=p, eps=eps)
+ return result
+
+ def query_ball_tree(self, other, r, p=2., eps=0):
+ """Find all pairs of points whose distance is at most r
+
+ Parameters
+ ==========
+
+ other : KDTree
+ The tree containing points to search against
+ r : positive float
+ The maximum distance
+ p : float 1<=p<=infinity
+ Which Minkowski norm to use
+ eps : nonnegative float
+ Approximate search. Branches of the tree are not explored
+ if their nearest points are further than r/(1+eps), and branches
+ are added in bulk if their furthest points are nearer than r*(1+eps).
+
+ Returns
+ =======
+
+ results : list of lists
+ For each element self.data[i] of this tree, results[i] is a list of the
+ indices of its neighbors in other.data.
+ """
+ results = [[] for i in range(self.n)]
+ def traverse_checking(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>r/(1.+eps):
+ return
+ elif rect1.max_distance_rectangle(rect2, p)<r*(1.+eps):
+ traverse_no_checking(node1, node2)
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ d = other.data[node2.idx]
+ for i in node1.idx:
+ results[i] += node2.idx[minkowski_distance(d,self.data[i],p)<=r].tolist()
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1,rect1,node2.less,less)
+ traverse_checking(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse_checking(node1.less,less,node2,rect2)
+ traverse_checking(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1.less,less1,node2.less,less2)
+ traverse_checking(node1.less,less1,node2.greater,greater2)
+ traverse_checking(node1.greater,greater1,node2.less,less2)
+ traverse_checking(node1.greater,greater1,node2.greater,greater2)
+
+ def traverse_no_checking(node1, node2):
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ results[i] += node2.idx.tolist()
+ else:
+ traverse_no_checking(node1, node2.less)
+ traverse_no_checking(node1, node2.greater)
+ else:
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+
+ traverse_checking(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+ return results
+
+ def query_pairs(self, r, p=2., eps=0):
+ """Find all pairs of points whose distance is at most r
+
+ Parameters
+ ==========
+
+ r : positive float
+ The maximum distance
+ p : float 1<=p<=infinity
+ Which Minkowski norm to use
+ eps : nonnegative float
+ Approximate search. Branches of the tree are not explored
+ if their nearest points are further than r/(1+eps), and branches
+ are added in bulk if their furthest points are nearer than r*(1+eps).
+
+ Returns
+ =======
+
+ results : set
+ set of pairs (i,j), i<j, for which the corresponing positions are
+ close.
+
+ """
+ results = set()
+ visited = set()
+ def test_set_visited(node1, node2):
+ i, j = sorted((id(node1),id(node2)))
+ if (i,j) in visited:
+ return True
+ else:
+ visited.add((i,j))
+ return False
+ def traverse_checking(node1, rect1, node2, rect2):
+ if test_set_visited(node1, node2):
+ return
+
+ if id(node2)<id(node1):
+ # This node pair will be visited in the other order
+ #return
+ pass
+
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ d = self.data[node2.idx]
+ for i in node1.idx:
+ for j in node2.idx[minkowski_distance(d,self.data[i],p)<=r]:
+ if i<j:
+ results.add((i,j))
+ elif j<i:
+ results.add((j,i))
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1,rect1,node2.less,less)
+ traverse_checking(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse_checking(node1.less,less,node2,rect2)
+ traverse_checking(node1.greater,greater,node2,rect2)
+ elif rect1.min_distance_rectangle(rect2, p)>r/(1.+eps):
+ return
+ elif rect1.max_distance_rectangle(rect2, p)<r*(1.+eps):
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse_checking(node1.less,less1,node2.less,less2)
+ traverse_checking(node1.less,less1,node2.greater,greater2)
+ traverse_checking(node1.greater,greater1,node2.less,less2)
+ traverse_checking(node1.greater,greater1,node2.greater,greater2)
+
+ def traverse_no_checking(node1, node2):
+ if test_set_visited(node1, node2):
+ return
+
+ if id(node2)<id(node1):
+ # This node pair will be visited in the other order
+ #return
+ pass
+ if isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ for j in node2.idx:
+ if i<j:
+ results.add((i,j))
+ elif j<i:
+ results.add((j,i))
+ else:
+ traverse_no_checking(node1, node2.less)
+ traverse_no_checking(node1, node2.greater)
+ else:
+ traverse_no_checking(node1.less, node2)
+ traverse_no_checking(node1.greater, node2)
+
+ traverse_checking(self.tree, Rectangle(self.maxes, self.mins),
+ self.tree, Rectangle(self.maxes, self.mins))
+ return results
+
+
+ def count_neighbors(self, other, r, p=2.):
+ """Count how many nearby pairs can be formed.
+
+ Count the number of pairs (x1,x2) can be formed, with x1 drawn
+ from self and x2 drawn from other, and where distance(x1,x2,p)<=r.
+ This is the "two-point correlation" described in Gray and Moore 2000,
+ "N-body problems in statistical learning", and the code here is based
+ on their algorithm.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ r : float or one-dimensional array of floats
+ The radius to produce a count for. Multiple radii are searched with a single
+ tree traversal.
+ p : float, 1<=p<=infinity
+ Which Minkowski p-norm to use
+
+ Returns
+ =======
+
+ result : integer or one-dimensional array of integers
+ The number of pairs. Note that this is internally stored in a numpy int,
+ and so may overflow if very large (two billion).
+ """
+
+ def traverse(node1, rect1, node2, rect2, idx):
+ min_r = rect1.min_distance_rectangle(rect2,p)
+ max_r = rect1.max_distance_rectangle(rect2,p)
+ c_greater = r[idx]>max_r
+ result[idx[c_greater]] += node1.children*node2.children
+ idx = idx[(min_r<=r[idx]) & (r[idx]<=max_r)]
+ if len(idx)==0:
+ return
+
+ if isinstance(node1,KDTree.leafnode):
+ if isinstance(node2,KDTree.leafnode):
+ ds = minkowski_distance(self.data[node1.idx][:,np.newaxis,:],
+ other.data[node2.idx][np.newaxis,:,:],
+ p).ravel()
+ ds.sort()
+ result[idx] += np.searchsorted(ds,r[idx],side='right')
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1, rect1, node2.less, less, idx)
+ traverse(node1, rect1, node2.greater, greater, idx)
+ else:
+ if isinstance(node2,KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less, less, node2, rect2, idx)
+ traverse(node1.greater, greater, node2, rect2, idx)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2,idx)
+ traverse(node1.less,less1,node2.greater,greater2,idx)
+ traverse(node1.greater,greater1,node2.less,less2,idx)
+ traverse(node1.greater,greater1,node2.greater,greater2,idx)
+ R1 = Rectangle(self.maxes, self.mins)
+ R2 = Rectangle(other.maxes, other.mins)
+ if np.shape(r) == ():
+ r = np.array([r])
+ result = np.zeros(1,dtype=int)
+ traverse(self.tree, R1, other.tree, R2, np.arange(1))
+ return result[0]
+ elif len(np.shape(r))==1:
+ r = np.asarray(r)
+ n, = r.shape
+ result = np.zeros(n,dtype=int)
+ traverse(self.tree, R1, other.tree, R2, np.arange(n))
+ return result
+ else:
+ raise ValueError("r must be either a single value or a one-dimensional array of values")
+
+ def sparse_distance_matrix(self, other, max_distance, p=2.):
+ """Compute a sparse distance matrix
+
+ Computes a distance matrix between two KDTrees, leaving as zero
+ any distance greater than max_distance.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ max_distance : positive float
+
+ Returns
+ =======
+
+ result : dok_matrix
+ Sparse matrix representing the results in "dictionary of keys" format.
+ """
+ result = scipy.sparse.dok_matrix((self.n,other.n))
+
+ def traverse(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>max_distance:
+ return
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ for j in node2.idx:
+ d = minkowski_distance(self.data[i],other.data[j],p)
+ if d<=max_distance:
+ result[i,j] = d
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1,rect1,node2.less,less)
+ traverse(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less,less,node2,rect2)
+ traverse(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2)
+ traverse(node1.less,less1,node2.greater,greater2)
+ traverse(node1.greater,greater1,node2.less,less2)
+ traverse(node1.greater,greater1,node2.greater,greater2)
+ traverse(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+
+ return result
+
+
+def distance_matrix(x,y,p=2,threshold=1000000):
+ """
+ Compute the distance matrix.
+
+ Returns the matrix of all pair-wise distances.
+
+ Parameters
+ ----------
+ x : array_like, `M` by `K`
+ TODO: description needed
+ y : array_like, `N` by `K`
+ TODO: description needed
+ p : float, 1 <= p <= infinity
+ Which Minkowski p-norm to use.
+ threshold : positive integer
+ If `M * N * K` > threshold, use a Python loop instead of creating
+ a very large temporary [what? array?].
+
+ Returns
+ -------
+ result : array_like, `M` by `N`
+
+ Examples
+ --------
+ >>> distance_matrix([[0,0],[0,1]], [[1,0],[1,1]])
+ array([[ 1. , 1.41421356],
+ [ 1.41421356, 1. ]])
+
+ """
+
+ x = np.asarray(x)
+ m, k = x.shape
+ y = np.asarray(y)
+ n, kk = y.shape
+
+ if k != kk:
+ raise ValueError("x contains %d-dimensional vectors but y contains %d-dimensional vectors" % (k, kk))
+
+ if m*n*k <= threshold:
+ return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
+ else:
+ result = np.empty((m,n),dtype=np.float) #FIXME: figure out the best dtype
+ if m<n:
+ for i in range(m):
+ result[i,:] = minkowski_distance(x[i],y,p)
+ else:
+ for j in range(n):
+ result[:,j] = minkowski_distance(x,y[j],p)
+ return result
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/setup.py
--- /dev/null
+++ b/yt/utilities/spatial/setup.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+from os.path import join
+
+def configuration(parent_package = '', top_path = None):
+ from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
+ from numpy.distutils.system_info import get_info
+ from distutils.sysconfig import get_python_inc
+
+ config = Configuration('spatial', parent_package, top_path)
+
+ config.add_data_dir('tests')
+
+# qhull_src = ['geom2.c', 'geom.c', 'global.c', 'io.c', 'libqhull.c',
+# 'mem.c', 'merge.c', 'poly2.c', 'poly.c', 'qset.c',
+# 'random.c', 'rboxlib.c', 'stat.c', 'user.c', 'usermem.c',
+# 'userprintf.c']
+
+# config.add_library('qhull',
+# sources=[join('qhull', 'src', x) for x in qhull_src],
+# include_dirs=[get_python_inc(),
+# get_numpy_include_dirs()],
+# # XXX: GCC dependency!
+# #extra_compiler_args=['-fno-strict-aliasing'],
+# )
+
+# lapack = dict(get_info('lapack_opt'))
+# try:
+# libs = ['qhull'] + lapack.pop('libraries')
+# except KeyError:
+# libs = ['qhull']
+# config.add_extension('qhull',
+# sources=['qhull.c'],
+# libraries=libs,
+# **lapack)
+
+ config.add_extension('ckdtree', sources=['ckdtree.c']) # FIXME: cython
+
+ config.add_extension('_distance_wrap',
+ sources=[join('src', 'distance_wrap.c'), join('src', 'distance.c')],
+ include_dirs = [get_numpy_include_dirs()])
+
+ return config
+
+if __name__ == '__main__':
+ from numpy.distutils.core import setup
+ setup(maintainer = "SciPy Developers",
+ author = "Anne Archibald",
+ maintainer_email = "scipy-dev at scipy.org",
+ description = "Spatial algorithms and data structures",
+ url = "http://www.scipy.org",
+ license = "SciPy License (BSD Style)",
+ **configuration(top_path='').todict()
+ )
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/setupscons.py
--- /dev/null
+++ b/yt/utilities/spatial/setupscons.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+from os.path import join
+
+def configuration(parent_package = '', top_path = None):
+ from numpy.distutils.misc_util import Configuration, get_numpy_include_dirs
+ config = Configuration('spatial', parent_package, top_path)
+
+ config.add_data_dir('tests')
+ config.add_sconscript('SConstruct')
+
+ return config
+
+if __name__ == '__main__':
+ from numpy.distutils.core import setup
+ setup(maintainer = "SciPy Developers",
+ author = "Anne Archibald",
+ maintainer_email = "scipy-dev at scipy.org",
+ description = "Spatial algorithms and data structures",
+ url = "http://www.scipy.org",
+ license = "SciPy License (BSD Style)",
+ **configuration(top_path='').todict()
+ )
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/src/common.h
--- /dev/null
+++ b/yt/utilities/spatial/src/common.h
@@ -0,0 +1,70 @@
+/**
+ * common.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved into new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CLUSTER_COMMON_H
+#define _CLUSTER_COMMON_H
+
+#define CPY_MAX(_x, _y) ((_x > _y) ? (_x) : (_y))
+#define CPY_MIN(_x, _y) ((_x < _y) ? (_x) : (_y))
+
+#define NCHOOSE2(_n) ((_n)*(_n-1)/2)
+
+#define CPY_BITS_PER_CHAR (sizeof(unsigned char) * 8)
+#define CPY_FLAG_ARRAY_SIZE_BYTES(num_bits) (CPY_CEIL_DIV((num_bits), \
+ CPY_BITS_PER_CHAR))
+#define CPY_GET_BIT(_xx, i) (((_xx)[(i) / CPY_BITS_PER_CHAR] >> \
+ ((CPY_BITS_PER_CHAR-1) - \
+ ((i) % CPY_BITS_PER_CHAR))) & 0x1)
+#define CPY_SET_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] |= \
+ ((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+#define CPY_CLEAR_BIT(_xx, i) ((_xx)[(i) / CPY_BITS_PER_CHAR] &= \
+ ~((0x1) << ((CPY_BITS_PER_CHAR-1) \
+ -((i) % CPY_BITS_PER_CHAR))))
+
+#ifndef CPY_CEIL_DIV
+#define CPY_CEIL_DIV(x, y) ((((double)x)/(double)y) == \
+ ((double)((x)/(y))) ? ((x)/(y)) : ((x)/(y) + 1))
+#endif
+
+
+#ifdef CPY_DEBUG
+#define CPY_DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__)
+#else
+#define CPY_DEBUG_MSG(...)
+#endif
+
+#endif
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/src/distance.c
--- /dev/null
+++ b/yt/utilities/spatial/src/distance.c
@@ -0,0 +1,958 @@
+/**
+ * distance.c
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <Python.h>
+#include <numpy/ndarrayobject.h>
+
+#include <math.h>
+#include <stdlib.h>
+#include "common.h"
+#include "distance.h"
+
+static NPY_INLINE double euclidean_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = u[i] - v[i];
+ s = s + d * d;
+ }
+ return sqrt(s);
+}
+
+static NPY_INLINE double ess_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + d * d;
+ }
+ return s;
+}
+
+static NPY_INLINE double chebyshev_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double d, maxv = 0.0;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ if (d > maxv) {
+ maxv = d;
+ }
+ }
+ return maxv;
+}
+
+static NPY_INLINE double canberra_distance(const double *u, const double *v, int n) {
+ int i;
+ double snum = 0.0, sdenom = 0.0, tot = 0.0;
+ for (i = 0; i < n; i++) {
+ snum = fabs(u[i] - v[i]);
+ sdenom = fabs(u[i]) + fabs(v[i]);
+ if (sdenom > 0.0) {
+ tot += snum / sdenom;
+ }
+ }
+ return tot;
+}
+
+static NPY_INLINE double bray_curtis_distance(const double *u, const double *v, int n) {
+ int i;
+ double s1 = 0.0, s2 = 0.0;
+ for (i = 0; i < n; i++) {
+ s1 += fabs(u[i] - v[i]);
+ s2 += fabs(u[i] + v[i]);
+ }
+ return s1 / s2;
+}
+
+static NPY_INLINE double mahalanobis_distance(const double *u, const double *v,
+ const double *covinv, double *dimbuf1,
+ double *dimbuf2, int n) {
+ int i, j;
+ double s;
+ const double *covrow = covinv;
+ for (i = 0; i < n; i++) {
+ dimbuf1[i] = u[i] - v[i];
+ }
+ for (i = 0; i < n; i++) {
+ covrow = covinv + (i * n);
+ s = 0.0;
+ for (j = 0; j < n; j++) {
+ s += dimbuf1[j] * covrow[j];
+ }
+ dimbuf2[i] = s;
+ }
+ s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += dimbuf1[i] * dimbuf2[i];
+ }
+ return sqrt(s);
+}
+
+double hamming_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s = s + (u[i] != v[i]);
+ }
+ return s / (double)n;
+}
+
+static NPY_INLINE double hamming_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s = s + (u[i] != v[i]);
+ }
+ return s / (double)n;
+}
+
+static NPY_INLINE double yule_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nff = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ nff += (!u[i] && !v[i]);
+ }
+ return (2.0 * ntf * nft) / (double)(ntt * nff + ntf * nft);
+}
+
+static NPY_INLINE double matching_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ }
+ return (double)(ntf + nft) / (double)(n);
+}
+
+static NPY_INLINE double dice_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ }
+ return (double)(nft + ntf) / (double)(2.0 * ntt + ntf + nft);
+}
+
+
+static NPY_INLINE double rogerstanimoto_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ int ntt = 0, nff = 0, nft = 0, ntf = 0;
+ for (i = 0; i < n; i++) {
+ ntt += (u[i] && v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);
+ nff += (!u[i] && !v[i]);
+ }
+ return (2.0 * (ntf + nft)) / ((double)ntt + nff + (2.0 * (ntf + nft)));
+}
+
+static NPY_INLINE double russellrao_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ /** int nff = 0, nft = 0, ntf = 0;**/
+ int ntt = 0;
+ for (i = 0; i < n; i++) {
+ /** nff += (!u[i] && !v[i]);
+ ntf += (u[i] && !v[i]);
+ nft += (!u[i] && v[i]);**/
+ ntt += (u[i] && v[i]);
+ }
+ /** return (double)(ntf + nft + nff) / (double)n;**/
+ return (double) (n - ntt) / (double) n;
+}
+
+static NPY_INLINE double kulsinski_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0, nff = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ nff += (!u[_i] && !v[_i]);
+ }
+ return ((double)(ntf + nft - ntt + n)) / ((double)(ntf + nft + n));
+}
+
+static NPY_INLINE double sokalsneath_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ }
+ return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt);
+}
+
+static NPY_INLINE double sokalmichener_distance_bool(const char *u, const char *v, int n) {
+ int _i = 0;
+ int ntt = 0, nft = 0, ntf = 0, nff = 0;
+ for (_i = 0; _i < n; _i++) {
+ ntt += (u[_i] && v[_i]);
+ nff += (!u[_i] && !v[_i]);
+ ntf += (u[_i] && !v[_i]);
+ nft += (!u[_i] && v[_i]);
+ }
+ return (2.0 * (ntf + nft))/(2.0 * (ntf + nft) + ntt + nff);
+}
+
+static NPY_INLINE double jaccard_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double denom = 0.0, num = 0.0;
+ for (i = 0; i < n; i++) {
+ num += (u[i] != v[i]) && ((u[i] != 0.0) || (v[i] != 0.0));
+ denom += (u[i] != 0.0) || (v[i] != 0.0);
+ }
+ return num / denom;
+}
+
+static NPY_INLINE double jaccard_distance_bool(const char *u, const char *v, int n) {
+ int i = 0;
+ double num = 0.0, denom = 0.0;
+ for (i = 0; i < n; i++) {
+ num += (u[i] != v[i]) && ((u[i] != 0) || (v[i] != 0));
+ denom += (u[i] != 0) || (v[i] != 0);
+ }
+ return num / denom;
+}
+
+static NPY_INLINE double dot_product(const double *u, const double *v, int n) {
+ int i;
+ double s = 0.0;
+ for (i = 0; i < n; i++) {
+ s += u[i] * v[i];
+ }
+ return s;
+}
+
+static NPY_INLINE double cosine_distance(const double *u, const double *v, int n,
+ const double nu, const double nv) {
+ return 1.0 - (dot_product(u, v, n) / (nu * nv));
+}
+
+static NPY_INLINE double seuclidean_distance(const double *var,
+ const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = u[i] - v[i];
+ s = s + (d * d) / var[i];
+ }
+ return sqrt(s);
+}
+
+static NPY_INLINE double city_block_distance(const double *u, const double *v, int n) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + d;
+ }
+ return s;
+}
+
+double minkowski_distance(const double *u, const double *v, int n, double p) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]);
+ s = s + pow(d, p);
+ }
+ return pow(s, 1.0 / p);
+}
+
+double weighted_minkowski_distance(const double *u, const double *v, int n, double p, const double *w) {
+ int i = 0;
+ double s = 0.0, d;
+ for (i = 0; i < n; i++) {
+ d = fabs(u[i] - v[i]) * w[i];
+ s = s + pow(d, p);
+ }
+ return pow(s, 1.0 / p);
+}
+
+void compute_mean_vector(double *res, const double *X, int m, int n) {
+ int i, j;
+ const double *v;
+ for (i = 0; i < n; i++) {
+ res[i] = 0.0;
+ }
+ for (j = 0; j < m; j++) {
+
+ v = X + (j * n);
+ for (i = 0; i < n; i++) {
+ res[i] += v[i];
+ }
+ }
+ for (i = 0; i < n; i++) {
+ res[i] /= (double)m;
+ }
+}
+
+void pdist_euclidean(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = euclidean_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_mahalanobis(const double *X, const double *covinv,
+ double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ double *dimbuf1, *dimbuf2;
+ dimbuf1 = (double*)malloc(sizeof(double) * 2 * n);
+ dimbuf2 = dimbuf1 + n;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, n);
+ }
+ }
+ dimbuf2 = 0;
+ free(dimbuf1);
+}
+
+void pdist_bray_curtis(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = bray_curtis_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_canberra(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = canberra_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_hamming(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = hamming_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_hamming_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = hamming_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_jaccard(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = jaccard_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_jaccard_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = jaccard_distance_bool(u, v, n);
+ }
+ }
+}
+
+
+void pdist_chebyshev(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = chebyshev_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = cosine_distance(u, v, n, norms[i], norms[j]);
+ }
+ }
+}
+
+void pdist_seuclidean(const double *X, const double *var,
+ double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = seuclidean_distance(var, u, v, n);
+ }
+ }
+}
+
+void pdist_city_block(const double *X, double *dm, int m, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = city_block_distance(u, v, n);
+ }
+ }
+}
+
+void pdist_minkowski(const double *X, double *dm, int m, int n, double p) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = minkowski_distance(u, v, n, p);
+ }
+ }
+}
+
+void pdist_weighted_minkowski(const double *X, double *dm, int m, int n, double p, const double *w) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = weighted_minkowski_distance(u, v, n, p, w);
+ }
+ }
+}
+
+void pdist_yule_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = yule_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_matching_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = matching_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_dice_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = dice_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = rogerstanimoto_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_russellrao_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = russellrao_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_kulsinski_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = kulsinski_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = sokalsneath_distance_bool(u, v, n);
+ }
+ }
+}
+
+void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < m; i++) {
+ for (j = i + 1; j < m; j++, it++) {
+ u = X + (n * i);
+ v = X + (n * j);
+ *it = sokalmichener_distance_bool(u, v, n);
+ }
+ }
+}
+
+void dist_to_squareform_from_vector(double *M, const double *v, int n) {
+ double *it;
+ const double *cit;
+ int i, j;
+ cit = v;
+ for (i = 0; i < n - 1; i++) {
+ it = M + (i * n) + i + 1;
+ for (j = i + 1; j < n; j++, it++, cit++) {
+ *it = *cit;
+ }
+ }
+}
+
+void dist_to_vector_from_squareform(const double *M, double *v, int n) {
+ double *it;
+ const double *cit;
+ int i, j;
+ it = v;
+ for (i = 0; i < n - 1; i++) {
+ cit = M + (i * n) + i + 1;
+ for (j = i + 1; j < n; j++, it++, cit++) {
+ *it = *cit;
+ }
+ }
+}
+
+
+/** cdist */
+
+void cdist_euclidean(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = euclidean_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_mahalanobis(const double *XA,
+ const double *XB,
+ const double *covinv,
+ double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ double *dimbuf1, *dimbuf2;
+ dimbuf1 = (double*)malloc(sizeof(double) * 2 * n);
+ dimbuf2 = dimbuf1 + n;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = mahalanobis_distance(u, v, covinv, dimbuf1, dimbuf2, n);
+ }
+ }
+ dimbuf2 = 0;
+ free(dimbuf1);
+}
+
+void cdist_bray_curtis(const double *XA, const double *XB,
+ double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = bray_curtis_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_canberra(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = canberra_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_hamming(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = hamming_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_hamming_bool(const char *XA,
+ const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = hamming_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_jaccard(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = jaccard_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_jaccard_bool(const char *XA,
+ const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = jaccard_distance_bool(u, v, n);
+ }
+ }
+}
+
+
+void cdist_chebyshev(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = chebyshev_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_cosine(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n,
+ const double *normsA, const double *normsB) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = cosine_distance(u, v, n, normsA[i], normsB[j]);
+ }
+ }
+}
+
+void cdist_seuclidean(const double *XA,
+ const double *XB,
+ const double *var,
+ double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = seuclidean_distance(var, u, v, n);
+ }
+ }
+}
+
+void cdist_city_block(const double *XA, const double *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = city_block_distance(u, v, n);
+ }
+ }
+}
+
+void cdist_minkowski(const double *XA, const double *XB, double *dm, int mA, int mB, int n, double p) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = minkowski_distance(u, v, n, p);
+ }
+ }
+}
+
+void cdist_weighted_minkowski(const double *XA, const double *XB, double *dm, int mA, int mB, int n, double p, const double *w) {
+ int i, j;
+ const double *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = weighted_minkowski_distance(u, v, n, p, w);
+ }
+ }
+}
+
+void cdist_yule_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = yule_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_matching_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = matching_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_dice_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = dice_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_rogerstanimoto_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = rogerstanimoto_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_russellrao_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = russellrao_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_kulsinski_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = kulsinski_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_sokalsneath_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = sokalsneath_distance_bool(u, v, n);
+ }
+ }
+}
+
+void cdist_sokalmichener_bool(const char *XA, const char *XB, double *dm, int mA, int mB, int n) {
+ int i, j;
+ const char *u, *v;
+ double *it = dm;
+ for (i = 0; i < mA; i++) {
+ for (j = 0; j < mB; j++, it++) {
+ u = XA + (n * i);
+ v = XB + (n * j);
+ *it = sokalmichener_distance_bool(u, v, n);
+ }
+ }
+}
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/src/distance.h
--- /dev/null
+++ b/yt/utilities/spatial/src/distance.h
@@ -0,0 +1,116 @@
+/**
+ * distance.h
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Copyright (c) 2007, 2008, Damian Eads. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CPY_DISTANCE_H
+#define _CPY_DISTANCE_H
+
+void dist_to_squareform_from_vector(double *M, const double *v, int n);
+void dist_to_vector_from_squareform(const double *M, double *v, int n);
+void pdist_euclidean(const double *X, double *dm, int m, int n);
+void pdist_seuclidean(const double *X,
+ const double *var, double *dm, int m, int n);
+void pdist_mahalanobis(const double *X, const double *covinv,
+ double *dm, int m, int n);
+void pdist_bray_curtis(const double *X, double *dm, int m, int n);
+void pdist_canberra(const double *X, double *dm, int m, int n);
+void pdist_hamming(const double *X, double *dm, int m, int n);
+void pdist_hamming_bool(const char *X, double *dm, int m, int n);
+void pdist_city_block(const double *X, double *dm, int m, int n);
+void pdist_cosine(const double *X, double *dm, int m, int n, const double *norms);
+void pdist_chebyshev(const double *X, double *dm, int m, int n);
+void pdist_jaccard(const double *X, double *dm, int m, int n);
+void pdist_jaccard_bool(const char *X, double *dm, int m, int n);
+void pdist_kulsinski_bool(const char *X, double *dm, int m, int n);
+void pdist_minkowski(const double *X, double *dm, int m, int n, double p);
+void pdist_weighted_minkowski(const double *X, double *dm, int m, int n, double p, const double *w);
+void pdist_yule_bool(const char *X, double *dm, int m, int n);
+void pdist_matching_bool(const char *X, double *dm, int m, int n);
+void pdist_dice_bool(const char *X, double *dm, int m, int n);
+void pdist_rogerstanimoto_bool(const char *X, double *dm, int m, int n);
+void pdist_russellrao_bool(const char *X, double *dm, int m, int n);
+void pdist_sokalmichener_bool(const char *X, double *dm, int m, int n);
+void pdist_sokalsneath_bool(const char *X, double *dm, int m, int n);
+
+void cdist_euclidean(const double *XA, const double *XB, double *dm, int mA, int mB, int n);
+void cdist_mahalanobis(const double *XA, const double *XB,
+ const double *covinv,
+ double *dm, int mA, int mB, int n);
+void cdist_bray_curtis(const double *XA, const double *XB,
+ double *dm, int mA, int mB, int n);
+void cdist_canberra(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_hamming(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_hamming_bool(const char *XA,
+ const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_jaccard(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_jaccard_bool(const char *XA,
+ const char *XB, double *dm, int mA, int mB, int n);
+void cdist_chebyshev(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n);
+void cdist_cosine(const double *XA,
+ const double *XB, double *dm, int mA, int mB, int n,
+ const double *normsA, const double *normsB);
+void cdist_seuclidean(const double *XA,
+ const double *XB,
+ const double *var,
+ double *dm, int mA, int mB, int n);
+void cdist_city_block(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_minkowski(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n, double p);
+void cdist_weighted_minkowski(const double *XA, const double *XB, double *dm,
+ int mA, int mB, int n, double p, const double *w);
+void cdist_yule_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_matching_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_dice_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_rogerstanimoto_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_russellrao_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_kulsinski_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_sokalsneath_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+void cdist_sokalmichener_bool(const char *XA, const char *XB, double *dm,
+ int mA, int mB, int n);
+
+#endif
diff -r 03a5780627a9fea13488fb83a2a9c6767f30e9f3 -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 yt/utilities/spatial/src/distance_wrap.c
--- /dev/null
+++ b/yt/utilities/spatial/src/distance_wrap.c
@@ -0,0 +1,1163 @@
+/**
+ * distance_wrap.c
+ *
+ * Author: Damian Eads
+ * Date: September 22, 2007 (moved to new file on June 8, 2008)
+ * Adapted for incorporation into Scipy, April 9, 2008.
+ *
+ * Copyright (c) 2007, Damian Eads. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of the author nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include "distance.h"
+#include "Python.h"
+#include <numpy/arrayobject.h>
+#include <stdio.h>
+
+extern PyObject *cdist_euclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_euclidean(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_canberra_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_canberra(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_bray_curtis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_bray_curtis(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *cdist_mahalanobis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *covinv_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ const double *covinv;
+ if (!PyArg_ParseTuple(args, "O!O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &covinv_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ covinv = (const double*)covinv_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_mahalanobis(XA, XB, covinv, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *cdist_chebyshev_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_chebyshev(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *cdist_cosine_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *normsA_, *normsB_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *normsA, *normsB;
+ if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &PyArray_Type, &normsA_,
+ &PyArray_Type, &normsB_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ normsA = (const double*)normsA_->data;
+ normsB = (const double*)normsB_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_cosine(XA, XB, dm, mA, mB, n, normsA, normsB);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_seuclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *var_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *var;
+ if (!PyArg_ParseTuple(args, "O!O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &var_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ var = (double*)var_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_seuclidean(XA, XB, var, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_city_block_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_city_block(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_hamming_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_hamming(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_hamming_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_hamming_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_jaccard_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_jaccard(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_jaccard_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_jaccard_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!O!d",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &p)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+ cdist_minkowski(XA, XB, dm, mA, mB, n, p);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_weighted_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_, *w_;
+ int mA, mB, n;
+ double *dm;
+ const double *XA, *XB, *w;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!O!dO!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_,
+ &p,
+ &PyArray_Type, &w_)) {
+ return 0;
+ }
+ else {
+ XA = (const double*)XA_->data;
+ XB = (const double*)XB_->data;
+ w = (const double*)w_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+ cdist_weighted_minkowski(XA, XB, dm, mA, mB, n, p, w);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *cdist_yule_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_yule_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_matching_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_matching_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_dice_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_dice_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_rogerstanimoto_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_rogerstanimoto_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_russellrao_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_russellrao_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_kulsinski_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_kulsinski_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_sokalmichener_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_sokalmichener_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *cdist_sokalsneath_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *XA_, *XB_, *dm_;
+ int mA, mB, n;
+ double *dm;
+ const char *XA, *XB;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &XA_, &PyArray_Type, &XB_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ XA = (const char*)XA_->data;
+ XB = (const char*)XB_->data;
+ dm = (double*)dm_->data;
+ mA = XA_->dimensions[0];
+ mB = XB_->dimensions[0];
+ n = XA_->dimensions[1];
+
+ cdist_sokalsneath_bool(XA, XB, dm, mA, mB, n);
+ }
+ return Py_BuildValue("");
+}
+
+/***************************** pdist ***/
+
+extern PyObject *pdist_euclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_euclidean(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_canberra_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_canberra(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_bray_curtis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_bray_curtis(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_mahalanobis_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *covinv_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ const double *covinv;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &covinv_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ covinv = (const double*)covinv_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_mahalanobis(X, covinv, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_chebyshev_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_chebyshev(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_cosine_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *norms_;
+ int m, n;
+ double *dm;
+ const double *X, *norms;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &PyArray_Type, &norms_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ norms = (const double*)norms_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_cosine(X, dm, m, n, norms);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_seuclidean_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *var_;
+ int m, n;
+ double *dm;
+ const double *X, *var;
+ if (!PyArg_ParseTuple(args, "O!O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &var_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ var = (double*)var_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_seuclidean(X, var, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_city_block_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_city_block(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_hamming_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_hamming(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_hamming_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_hamming_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_jaccard_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const double *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_jaccard(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_jaccard_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_jaccard_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm, *X;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!d",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &p)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_minkowski(X, dm, m, n, p);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *pdist_weighted_minkowski_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_, *w_;
+ int m, n;
+ double *dm, *X, *w;
+ double p;
+ if (!PyArg_ParseTuple(args, "O!O!dO!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_,
+ &p,
+ &PyArray_Type, &w_)) {
+ return 0;
+ }
+ else {
+ X = (double*)X_->data;
+ dm = (double*)dm_->data;
+ w = (const double*)w_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_weighted_minkowski(X, dm, m, n, p, w);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+extern PyObject *pdist_yule_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_yule_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_matching_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_matching_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_dice_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_dice_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_rogerstanimoto_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_rogerstanimoto_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_russellrao_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_russellrao_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_kulsinski_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_kulsinski_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_sokalmichener_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_sokalmichener_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *pdist_sokalsneath_bool_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *X_, *dm_;
+ int m, n;
+ double *dm;
+ const char *X;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &X_,
+ &PyArray_Type, &dm_)) {
+ return 0;
+ }
+ else {
+ X = (const char*)X_->data;
+ dm = (double*)dm_->data;
+ m = X_->dimensions[0];
+ n = X_->dimensions[1];
+
+ pdist_sokalsneath_bool(X, dm, m, n);
+ }
+ return Py_BuildValue("");
+}
+
+extern PyObject *to_squareform_from_vector_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *M_, *v_;
+ int n;
+ const double *v;
+ double *M;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &M_,
+ &PyArray_Type, &v_)) {
+ return 0;
+ }
+ else {
+ M = (double*)M_->data;
+ v = (const double*)v_->data;
+ n = M_->dimensions[0];
+ dist_to_squareform_from_vector(M, v, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+extern PyObject *to_vector_from_squareform_wrap(PyObject *self, PyObject *args) {
+ PyArrayObject *M_, *v_;
+ int n;
+ double *v;
+ const double *M;
+ if (!PyArg_ParseTuple(args, "O!O!",
+ &PyArray_Type, &M_,
+ &PyArray_Type, &v_)) {
+ return 0;
+ }
+ else {
+ M = (const double*)M_->data;
+ v = (double*)v_->data;
+ n = M_->dimensions[0];
+ dist_to_vector_from_squareform(M, v, n);
+ }
+ return Py_BuildValue("d", 0.0);
+}
+
+
+static PyMethodDef _distanceWrapMethods[] = {
+ {"cdist_bray_curtis_wrap", cdist_bray_curtis_wrap, METH_VARARGS},
+ {"cdist_canberra_wrap", cdist_canberra_wrap, METH_VARARGS},
+ {"cdist_chebyshev_wrap", cdist_chebyshev_wrap, METH_VARARGS},
+ {"cdist_city_block_wrap", cdist_city_block_wrap, METH_VARARGS},
+ {"cdist_cosine_wrap", cdist_cosine_wrap, METH_VARARGS},
+ {"cdist_dice_bool_wrap", cdist_dice_bool_wrap, METH_VARARGS},
+ {"cdist_euclidean_wrap", cdist_euclidean_wrap, METH_VARARGS},
+ {"cdist_hamming_wrap", cdist_hamming_wrap, METH_VARARGS},
+ {"cdist_hamming_bool_wrap", cdist_hamming_bool_wrap, METH_VARARGS},
+ {"cdist_jaccard_wrap", cdist_jaccard_wrap, METH_VARARGS},
+ {"cdist_jaccard_bool_wrap", cdist_jaccard_bool_wrap, METH_VARARGS},
+ {"cdist_kulsinski_bool_wrap", cdist_kulsinski_bool_wrap, METH_VARARGS},
+ {"cdist_mahalanobis_wrap", cdist_mahalanobis_wrap, METH_VARARGS},
+ {"cdist_matching_bool_wrap", cdist_matching_bool_wrap, METH_VARARGS},
+ {"cdist_minkowski_wrap", cdist_minkowski_wrap, METH_VARARGS},
+ {"cdist_weighted_minkowski_wrap", cdist_weighted_minkowski_wrap, METH_VARARGS},
+ {"cdist_rogerstanimoto_bool_wrap", cdist_rogerstanimoto_bool_wrap, METH_VARARGS},
+ {"cdist_russellrao_bool_wrap", cdist_russellrao_bool_wrap, METH_VARARGS},
+ {"cdist_seuclidean_wrap", cdist_seuclidean_wrap, METH_VARARGS},
+ {"cdist_sokalmichener_bool_wrap", cdist_sokalmichener_bool_wrap, METH_VARARGS},
+ {"cdist_sokalsneath_bool_wrap", cdist_sokalsneath_bool_wrap, METH_VARARGS},
+ {"cdist_yule_bool_wrap", cdist_yule_bool_wrap, METH_VARARGS},
+ {"pdist_bray_curtis_wrap", pdist_bray_curtis_wrap, METH_VARARGS},
+ {"pdist_canberra_wrap", pdist_canberra_wrap, METH_VARARGS},
+ {"pdist_chebyshev_wrap", pdist_chebyshev_wrap, METH_VARARGS},
+ {"pdist_city_block_wrap", pdist_city_block_wrap, METH_VARARGS},
+ {"pdist_cosine_wrap", pdist_cosine_wrap, METH_VARARGS},
+ {"pdist_dice_bool_wrap", pdist_dice_bool_wrap, METH_VARARGS},
+ {"pdist_euclidean_wrap", pdist_euclidean_wrap, METH_VARARGS},
+ {"pdist_hamming_wrap", pdist_hamming_wrap, METH_VARARGS},
+ {"pdist_hamming_bool_wrap", pdist_hamming_bool_wrap, METH_VARARGS},
+ {"pdist_jaccard_wrap", pdist_jaccard_wrap, METH_VARARGS},
+ {"pdist_jaccard_bool_wrap", pdist_jaccard_bool_wrap, METH_VARARGS},
+ {"pdist_kulsinski_bool_wrap", pdist_kulsinski_bool_wrap, METH_VARARGS},
+ {"pdist_mahalanobis_wrap", pdist_mahalanobis_wrap, METH_VARARGS},
+ {"pdist_matching_bool_wrap", pdist_matching_bool_wrap, METH_VARARGS},
+ {"pdist_minkowski_wrap", pdist_minkowski_wrap, METH_VARARGS},
+ {"pdist_weighted_minkowski_wrap", pdist_weighted_minkowski_wrap, METH_VARARGS},
+ {"pdist_rogerstanimoto_bool_wrap", pdist_rogerstanimoto_bool_wrap, METH_VARARGS},
+ {"pdist_russellrao_bool_wrap", pdist_russellrao_bool_wrap, METH_VARARGS},
+ {"pdist_seuclidean_wrap", pdist_seuclidean_wrap, METH_VARARGS},
+ {"pdist_sokalmichener_bool_wrap", pdist_sokalmichener_bool_wrap, METH_VARARGS},
+ {"pdist_sokalsneath_bool_wrap", pdist_sokalsneath_bool_wrap, METH_VARARGS},
+ {"pdist_yule_bool_wrap", pdist_yule_bool_wrap, METH_VARARGS},
+ {"to_squareform_from_vector_wrap",
+ to_squareform_from_vector_wrap, METH_VARARGS},
+ {"to_vector_from_squareform_wrap",
+ to_vector_from_squareform_wrap, METH_VARARGS},
+ {NULL, NULL} /* Sentinel - marks the end of this structure */
+};
+
+#if PY_VERSION_HEX >= 0x03000000
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "_distance_wrap",
+ NULL,
+ -1,
+ _distanceWrapMethods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyObject *PyInit__distance_wrap(void)
+{
+ PyObject *m;
+
+ m = PyModule_Create(&moduledef);
+ import_array();
+
+ return m;
+}
+#else
+PyMODINIT_FUNC init_distance_wrap(void)
+{
+ (void) Py_InitModule("_distance_wrap", _distanceWrapMethods);
+ import_array(); // Must be present for NumPy. Called first after above line.
+}
+#endif
https://bitbucket.org/yt_analysis/yt/changeset/5bf2080e06f6/
changeset: 5bf2080e06f6
branch: yt
user: sskory
date: 2011-11-02 20:30:26
summary: Force regions that have been touched by the boolean stuff to
make correct cut_masks and not pull the wrong ones.
affected #: 1 file
diff -r 04d0a3952d814ecc1d125ad29cbaf4f2efd17cc9 -r 5bf2080e06f63a6b8b5e4c0baa4b3c2b52c16e3b yt/data_objects/data_containers.py
--- a/yt/data_objects/data_containers.py
+++ b/yt/data_objects/data_containers.py
@@ -96,7 +96,8 @@
def check_cache(self, grid):
if isinstance(grid, FakeGridForParticles):
return func(self, grid)
- elif grid.id not in self._cut_masks:
+ elif grid.id not in self._cut_masks or \
+ hasattr(self, "_boolean_touched"):
cm = func(self, grid)
self._cut_masks[grid.id] = cm
return self._cut_masks[grid.id]
@@ -3460,6 +3461,8 @@
for item in self.regions:
if isinstance(item, types.StringType): continue
self._all_regions.append(item)
+ # So cut_masks don't get messed up.
+ item._boolean_touched = True
self._all_regions = na.unique(self._all_regions)
def _make_overlaps(self):
@@ -3475,8 +3478,13 @@
overall = self._get_cut_mask(grid)
local = force_array(region._get_cut_mask(grid),
grid.ActiveDimensions)
- if (local == na.bitwise_and(overall, local)).all():
- # All of local is in overall
+ # Below we don't want to match empty masks.
+ if overall.sum() == 0 and local.sum() == 0: continue
+ # The whole grid is in the hybrid region if a) its cut_mask
+ # in the original region is identical to the new one and b)
+ # the original region cut_mask is all ones.
+ if (local == na.bitwise_and(overall, local)).all() and \
+ (local == True).all():
self._all_overlap.append(grid)
continue
if (overall == local).any():
@@ -3499,7 +3507,7 @@
return self._cut_masks[grid.id]
# If we get this far, we have to generate the cut_mask.
return self._get_level_mask(self.regions, grid)
-
+
def _get_level_mask(self, ops, grid):
level_masks = []
end = 0
@@ -3546,6 +3554,8 @@
this_cut_mask)
if item == "OR":
na.bitwise_or(this_cut_mask, level_masks[i+1], this_cut_mask)
+ if not isinstance(grid, FakeGridForParticles):
+ self._cut_masks[grid.id] = this_cut_mask
return this_cut_mask
https://bitbucket.org/yt_analysis/yt/changeset/edda8018cea4/
changeset: edda8018cea4
branch: yt
user: sskory
date: 2011-11-02 20:46:47
summary: Particles in boolean regions appear to work.
affected #: 1 file
diff -r 5bf2080e06f63a6b8b5e4c0baa4b3c2b52c16e3b -r edda8018cea47224ff2155702bf5dc997d906a85 yt/data_objects/data_containers.py
--- a/yt/data_objects/data_containers.py
+++ b/yt/data_objects/data_containers.py
@@ -2334,7 +2334,8 @@
if force_particle_read == False and \
self.pf.field_info.has_key(field) and \
self.pf.field_info[field].particle_type and \
- self.pf.h.io._particle_reader:
+ self.pf.h.io._particle_reader and \
+ not isinstance(self, AMRBooleanRegionBase):
self.particles.get_data(field)
if field not in self.field_data:
if self._generate_field(field): continue
https://bitbucket.org/yt_analysis/yt/changeset/1562ec7010e0/
changeset: 1562ec7010e0
branch: yt
user: sskory
date: 2011-11-02 22:29:40
summary: Adding answer testing for the boolean regions.
affected #: 2 files
diff -r edda8018cea47224ff2155702bf5dc997d906a85 -r 1562ec7010e0051d012d4203ea88048ef68092e5 tests/boolean_regions.py
--- /dev/null
+++ b/tests/boolean_regions.py
@@ -0,0 +1,18 @@
+from yt.utilities.answer_testing.output_tests import \
+ SingleOutputTest, create_test
+from yt.utilities.answer_testing.boolean_region_tests import \
+ TestBooleanANDGridQuantity, TestBooleanORGridQuantity, \
+ TestBooleanNOTGridQuantity, TestBooleanANDParticleQuantity, \
+ TestBooleanORParticleQuantity, TestBooleanNOTParticleQuantity
+
+create_test(TestBooleanANDGridQuantity, "BooleanANDGrid")
+
+create_test(TestBooleanORGridQuantity, "BooleanORGrid")
+
+create_test(TestBooleanNOTGridQuantity, "BooleanNOTGrid")
+
+create_test(TestBooleanANDParticleQuantity, "BooleanANDParticle")
+
+create_test(TestBooleanORParticleQuantity, "BooleanORParticle")
+
+create_test(TestBooleanNOTParticleQuantity, "BooleanNOTParticle")
\ No newline at end of file
diff -r edda8018cea47224ff2155702bf5dc997d906a85 -r 1562ec7010e0051d012d4203ea88048ef68092e5 yt/utilities/answer_testing/boolean_region_tests.py
--- /dev/null
+++ b/yt/utilities/answer_testing/boolean_region_tests.py
@@ -0,0 +1,165 @@
+from yt.mods import *
+import matplotlib
+import pylab
+from output_tests import SingleOutputTest, YTStaticOutputTest, create_test
+import hashlib
+import numpy as np
+
+# Tests to make sure that grid quantities are identical that should
+# be identical for the AND operator.
+class TestBooleanANDGridQuantity(YTStaticOutputTest);
+ def run(self):
+ domain = self.pf.domain_right_edge - self.pf.domain_left_edge
+ four = 0.4 * domain + self.pf.domain_left_edge
+ five = 0.5 * domain + self.pf.domain_left_edge
+ six = 0.6 * domain + self.pf.domain_left_edge
+ re1 = self.pf.h.region_strict(five, four, six)
+ re2 = self.pf.h.region_strict(five, five, six)
+ re = self.pf.h.boolean([re1, "AND", re2])
+ # re should look like re2.
+ x2 = re2['x']
+ x = re['x']
+ x2 = x2[x2.argsort()]
+ x = x[x.argsort()]
+ self.result = (x2, x)
+
+ def compare(self, old_result):
+ self.compare_array_delta(self.result[0], self.result[1])
+
+ def plot(self):
+ return []
+
+# OR
+class TestBooleanORGridQuantity(YTStaticOutputTest);
+ def run(self):
+ domain = self.pf.domain_right_edge - self.pf.domain_left_edge
+ four = 0.4 * domain + self.pf.domain_left_edge
+ five = 0.5 * domain + self.pf.domain_left_edge
+ six = 0.6 * domain + self.pf.domain_left_edge
+ re1 = self.pf.h.region_strict(five, four, six)
+ re2 = self.pf.h.region_strict(five, five, six)
+ re = self.pf.h.boolean([re1, "OR", re2])
+ # re should look like re1
+ x1 = re1['x']
+ x = re['x']
+ x1 = x1[x1.argsort()]
+ x = x[x.argsort()]
+ self.result = (x1, x)
+
+ def compare(self, old_result):
+ self.compare_array_delta(self.result[0], self.result[1])
+
+ def plot(self):
+ return []
+
+# NOT
+class TestBooleanNOTGridQuantity(YTStaticOutputTest);
+ def run(self):
+ domain = self.pf.domain_right_edge - self.pf.domain_left_edge
+ four = 0.4 * domain + self.pf.domain_left_edge
+ five = 0.5 * domain + self.pf.domain_left_edge
+ six = 0.6 * domain + self.pf.domain_left_edge
+ re1 = self.pf.h.region_strict(five, four, six)
+ re2 = self.pf.h.region_strict(five, five, six)
+ # Bottom base
+ re3 = self.pf.h.region_strict(five, four, [six[0], six[1], five[2]])
+ # Side
+ re4 = self.pf.h.region_strict(five, [four[0], four[1], five[2]],
+ [five[0], six[1], six[2]])
+ # Last small cube
+ re5 = self.pf.h.region_strict(five, [five[0], four[0], four[2]],
+ [six[0], five[1], six[2]])
+ # re1 NOT re2 should look like re3 OR re4 OR re5
+ re = self.pf.h.boolean([re1, "NOT", re2])
+ reo = self.pf.h.boolean([re3, "OR", re4, "OR", re5])
+ x = re['x']
+ xo = reo['x']
+ x = x[x.argsort()]
+ xo = xo[xo.argsort()]
+ self.result = (x, xo)
+
+ def compare(self, old_result):
+ self.compare_array_delta(self.result[0], self.result[1])
+
+ def plot(self):
+ return []
+
+# Tests to make sure that particle quantities are identical that should
+# be identical for the AND operator.
+class TestBooleanANDParticleQuantity(YTStaticOutputTest);
+ def run(self):
+ domain = self.pf.domain_right_edge - self.pf.domain_left_edge
+ four = 0.4 * domain + self.pf.domain_left_edge
+ five = 0.5 * domain + self.pf.domain_left_edge
+ six = 0.6 * domain + self.pf.domain_left_edge
+ re1 = self.pf.h.region_strict(five, four, six)
+ re2 = self.pf.h.region_strict(five, five, six)
+ re = self.pf.h.boolean([re1, "AND", re2])
+ # re should look like re2.
+ x2 = re2['particle_position_x']
+ x = re['particle_position_x']
+ x2 = x2[x2.argsort()]
+ x = x[x.argsort()]
+ self.result = (x2, x)
+
+ def compare(self, old_result):
+ self.compare_array_delta(self.result[0], self.result[1])
+
+ def plot(self):
+ return []
+
+# OR
+class TestBooleanORParticleQuantity(YTStaticOutputTest);
+ def run(self):
+ domain = self.pf.domain_right_edge - self.pf.domain_left_edge
+ four = 0.4 * domain + self.pf.domain_left_edge
+ five = 0.5 * domain + self.pf.domain_left_edge
+ six = 0.6 * domain + self.pf.domain_left_edge
+ re1 = self.pf.h.region_strict(five, four, six)
+ re2 = self.pf.h.region_strict(five, five, six)
+ re = self.pf.h.boolean([re1, "OR", re2])
+ # re should look like re1
+ x1 = re1['particle_position_x']
+ x = re['particle_position_x']
+ x1 = x1[x1.argsort()]
+ x = x[x.argsort()]
+ self.result = (x1, x)
+
+ def compare(self, old_result):
+ self.compare_array_delta(self.result[0], self.result[1])
+
+ def plot(self):
+ return []
+
+# NOT
+class TestBooleanNOTParticleQuantity(YTStaticOutputTest);
+ def run(self):
+ domain = self.pf.domain_right_edge - self.pf.domain_left_edge
+ four = 0.4 * domain + self.pf.domain_left_edge
+ five = 0.5 * domain + self.pf.domain_left_edge
+ six = 0.6 * domain + self.pf.domain_left_edge
+ re1 = self.pf.h.region_strict(five, four, six)
+ re2 = self.pf.h.region_strict(five, five, six)
+ # Bottom base
+ re3 = self.pf.h.region_strict(five, four, [six[0], six[1], five[2]])
+ # Side
+ re4 = self.pf.h.region_strict(five, [four[0], four[1], five[2]],
+ [five[0], six[1], six[2]])
+ # Last small cube
+ re5 = self.pf.h.region_strict(five, [five[0], four[0], four[2]],
+ [six[0], five[1], six[2]])
+ # re1 NOT re2 should look like re3 OR re4 OR re5
+ re = self.pf.h.boolean([re1, "NOT", re2])
+ reo = self.pf.h.boolean([re3, "OR", re4, "OR", re5])
+ x = re['particle_position_x']
+ xo = reo['particle_position_x']
+ x = x[x.argsort()]
+ xo = xo[xo.argsort()]
+ self.result = (x, xo)
+
+ def compare(self, old_result):
+ self.compare_array_delta(self.result[0], self.result[1])
+
+ def plot(self):
+ return []
+
https://bitbucket.org/yt_analysis/yt/changeset/3506f59b4060/
changeset: 3506f59b4060
branch: yt
user: sskory
date: 2011-11-02 22:30:36
summary: Merge.
affected #: 3 files
diff -r 1562ec7010e0051d012d4203ea88048ef68092e5 -r 3506f59b406044622636dee01f09cd9f9b864975 doc/install_script.sh
--- a/doc/install_script.sh
+++ b/doc/install_script.sh
@@ -304,7 +304,7 @@
get_enzotools Python-2.7.2.tgz
get_enzotools numpy-1.6.1.tar.gz
get_enzotools matplotlib-1.1.0.tar.gz
-get_enzotools mercurial-1.8.1.tar.gz
+get_enzotools mercurial-2.0.tar.gz
get_enzotools ipython-0.10.tar.gz
get_enzotools h5py-2.0.1.tar.gz
get_enzotools Cython-0.15.1.tar.gz
@@ -442,7 +442,7 @@
if [ $INST_HG -eq 1 ]
then
echo "Installing Mercurial."
- do_setup_py mercurial-1.8.1
+ do_setup_py mercurial-2.0
export HG_EXEC=${DEST_DIR}/bin/hg
else
# We assume that hg can be found in the path.
@@ -553,6 +553,7 @@
then
( unzip -o ext-3.3.2.zip 2>&1 ) 1>> ${LOG_FILE} || do_exit
( echo "Symlinking ext-3.3.2 as ext-resources" 2>&1 ) 1>> ${LOG_FILE}
+ rm -rf ext-resources
ln -sf ext-3.3.2 ext-resources
touch ext-3.3.2/done
fi
@@ -562,6 +563,7 @@
then
( unzip -o ext-slate-110328.zip 2>&1 ) 1>> ${LOG_FILE} || do_exit
( echo "Symlinking ext-slate-110328 as ext-theme" 2>&1 ) 1>> ${LOG_FILE}
+ rm -rf ext-theme
ln -sf ext-slate-110328 ext-theme
touch ext-slate-110328/done
fi
@@ -571,6 +573,7 @@
then
( unzip -o PhiloGL-1.4.2.zip 2>&1 ) 1>> ${LOG_FILE} || do_exit
( echo "Symlinking PhiloGL-1.4.2 as PhiloGL" 2>&1 ) 1>> ${LOG_FILE}
+ rm -rf PhiloGL
ln -sf PhiloGL-1.4.2 PhiloGL
touch PhiloGL-1.4.2/done
fi
diff -r 1562ec7010e0051d012d4203ea88048ef68092e5 -r 3506f59b406044622636dee01f09cd9f9b864975 yt/utilities/amr_kdtree/amr_kdtree.py
--- a/yt/utilities/amr_kdtree/amr_kdtree.py
+++ b/yt/utilities/amr_kdtree/amr_kdtree.py
@@ -1236,7 +1236,7 @@
except:
pass
f.close()
- if self.comm.rank != (nprocs-1):
+ if self.comm.rank != (self.comm.size-1):
self.comm.send_array([0],self.comm.rank+1, tag=self.comm.rank)
def load_kd_bricks(self,fn=None):
@@ -1266,7 +1266,7 @@
f.close()
except:
pass
- if self.comm.rank != (nprocs-1):
+ if self.comm.rank != (self.comm.size-1):
self.comm.send_array([0],self.comm.rank+1, tag=self.comm.rank)
def load_tree(self,fn):
diff -r 1562ec7010e0051d012d4203ea88048ef68092e5 -r 3506f59b406044622636dee01f09cd9f9b864975 yt/utilities/answer_testing/output_tests.py
--- a/yt/utilities/answer_testing/output_tests.py
+++ b/yt/utilities/answer_testing/output_tests.py
@@ -55,8 +55,11 @@
class ArrayDelta(ValueDelta):
def __repr__(self):
- return "ArrayDelta: Delta %s, max of %s" % (
- self.delta, self.acceptable)
+ nabove = len(na.where(self.delta > self.acceptable)[0])
+ return "ArrayDelta: Delta %s, max of %s, acceptable of %s.\n" \
+ "%d of %d points above the acceptable limit" % \
+ (self.delta, self.delta.max(), self.acceptable, nabove,
+ self.delta.size)
class ShapeMismatch(RegressionTestException):
def __init__(self, old_shape, current_shape):
https://bitbucket.org/yt_analysis/yt/changeset/d30bc9987ab8/
changeset: d30bc9987ab8
branch: yt
user: sskory
date: 2011-11-02 22:40:49
summary: Forgot to add the boolean region tests to here.
affected #: 1 file
diff -r 3506f59b406044622636dee01f09cd9f9b864975 -r d30bc9987ab8fd2acb457eed5f3a0bb4253c9f02 yt/utilities/answer_testing/api.py
--- a/yt/utilities/answer_testing/api.py
+++ b/yt/utilities/answer_testing/api.py
@@ -50,3 +50,12 @@
TestHaloCompositionHashHOP, \
TestHaloCompositionHashFOF, \
TestHaloCompositionHashPHOP
+
+from .boolean_region_test import \
+ TestBooleanANDGridQuantity, \
+ TestBooleanORGridQuantity, \
+ TestBooleanNOTGridQuantity, \
+ TestBooleanANDParticleQuantity, \
+ TestBooleanORParticleQuantity, \
+ TestBooleanNOTParticleQuantity
+
https://bitbucket.org/yt_analysis/yt/changeset/5358c43b84b8/
changeset: 5358c43b84b8
branch: yt
user: sskory
date: 2011-11-02 22:52:40
summary: Ooops. ; != :.
affected #: 1 file
diff -r d30bc9987ab8fd2acb457eed5f3a0bb4253c9f02 -r 5358c43b84b8854d07b08004d5059d4a4e492201 yt/utilities/answer_testing/boolean_region_tests.py
--- a/yt/utilities/answer_testing/boolean_region_tests.py
+++ b/yt/utilities/answer_testing/boolean_region_tests.py
@@ -7,7 +7,7 @@
# Tests to make sure that grid quantities are identical that should
# be identical for the AND operator.
-class TestBooleanANDGridQuantity(YTStaticOutputTest);
+class TestBooleanANDGridQuantity(YTStaticOutputTest):
def run(self):
domain = self.pf.domain_right_edge - self.pf.domain_left_edge
four = 0.4 * domain + self.pf.domain_left_edge
@@ -30,7 +30,7 @@
return []
# OR
-class TestBooleanORGridQuantity(YTStaticOutputTest);
+class TestBooleanORGridQuantity(YTStaticOutputTest):
def run(self):
domain = self.pf.domain_right_edge - self.pf.domain_left_edge
four = 0.4 * domain + self.pf.domain_left_edge
@@ -53,7 +53,7 @@
return []
# NOT
-class TestBooleanNOTGridQuantity(YTStaticOutputTest);
+class TestBooleanNOTGridQuantity(YTStaticOutputTest):
def run(self):
domain = self.pf.domain_right_edge - self.pf.domain_left_edge
four = 0.4 * domain + self.pf.domain_left_edge
@@ -86,7 +86,7 @@
# Tests to make sure that particle quantities are identical that should
# be identical for the AND operator.
-class TestBooleanANDParticleQuantity(YTStaticOutputTest);
+class TestBooleanANDParticleQuantity(YTStaticOutputTest):
def run(self):
domain = self.pf.domain_right_edge - self.pf.domain_left_edge
four = 0.4 * domain + self.pf.domain_left_edge
@@ -109,7 +109,7 @@
return []
# OR
-class TestBooleanORParticleQuantity(YTStaticOutputTest);
+class TestBooleanORParticleQuantity(YTStaticOutputTest):
def run(self):
domain = self.pf.domain_right_edge - self.pf.domain_left_edge
four = 0.4 * domain + self.pf.domain_left_edge
@@ -132,7 +132,7 @@
return []
# NOT
-class TestBooleanNOTParticleQuantity(YTStaticOutputTest);
+class TestBooleanNOTParticleQuantity(YTStaticOutputTest):
def run(self):
domain = self.pf.domain_right_edge - self.pf.domain_left_edge
four = 0.4 * domain + self.pf.domain_left_edge
https://bitbucket.org/yt_analysis/yt/changeset/a732ec370ba7/
changeset: a732ec370ba7
branch: yt
user: sskory
date: 2011-11-02 23:17:06
summary: Fixing the setup for spatial so it compiles and is installed
on Linux. For some reason what I had before worked on Mac OS X,
but not Linux.
affected #: 2 files
diff -r 5358c43b84b8854d07b08004d5059d4a4e492201 -r a732ec370ba724b3be87670ae7143a09bcb15520 yt/utilities/setup.py
--- a/yt/utilities/setup.py
+++ b/yt/utilities/setup.py
@@ -148,8 +148,7 @@
config.add_subpackage("delaunay") # From SciPy, written by Robert Kern
config.add_subpackage("kdtree")
config.add_data_files(('kdtree', ['kdtree/fKDpy.so',]))
- config.add_extension('spatial', ["yt/utilities/spatial/ckdtree.pyx"],
- libraries=["m"])
+ config.add_subpackage("spatial")
config.add_subpackage("parallel_tools")
config.add_extension("data_point_utilities",
"yt/utilities/data_point_utilities.c", libraries=["m"])
diff -r 5358c43b84b8854d07b08004d5059d4a4e492201 -r a732ec370ba724b3be87670ae7143a09bcb15520 yt/utilities/spatial/setup.py
--- a/yt/utilities/spatial/setup.py
+++ b/yt/utilities/spatial/setup.py
@@ -34,7 +34,9 @@
# libraries=libs,
# **lapack)
- config.add_extension('ckdtree', sources=['ckdtree.c']) # FIXME: cython
+ config.add_extension('ckdtree', sources=['ckdtree.pyx'],
+ libraries=["m"],
+ include_dirs = [get_numpy_include_dirs()])
config.add_extension('_distance_wrap',
sources=[join('src', 'distance_wrap.c'), join('src', 'distance.c')],
Repository URL: https://bitbucket.org/yt_analysis/yt/
--
This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.
More information about the yt-svn
mailing list