[Yt-svn] yt: Modifications of the merger tree.
hg at spacepope.org
hg at spacepope.org
Wed Mar 31 15:51:41 PDT 2010
hg Repository: yt
details: yt/rev/ce261b18ec2c
changeset: 1506:ce261b18ec2c
user: Stephen Skory <stephenskory at yahoo.com>
date:
Wed Mar 31 15:51:34 2010 -0700
description:
Modifications of the merger tree.
- Now handles datasets with no halos.
- Faster halo membership comparisons. Now the slowest thing during that
part of the process is writing to the SQLite file, so I think it's
just about as fast as it will get.
- Now it uses the fortran kd tree.
Also a few changes to halo finding stuff.
diffstat:
yt/extensions/MergerTree.py | 353 ++++++++++++++++++++++++++++-----------
yt/lagos/HaloFinding.py | 24 +-
yt/lagos/ParallelTools.py | 45 +++++
yt/lagos/parallelHOP/parallelHOP.py | 3 +-
4 files changed, 311 insertions(+), 114 deletions(-)
diffs (truncated from 681 to 300 lines):
diff -r f2e555c03efe -r ce261b18ec2c yt/extensions/MergerTree.py
--- a/yt/extensions/MergerTree.py Wed Mar 31 10:16:28 2010 -0700
+++ b/yt/extensions/MergerTree.py Wed Mar 31 15:51:34 2010 -0700
@@ -24,7 +24,7 @@
"""
import yt.lagos as lagos
-from yt.lagos.kd import *
+from yt.extensions.kdtree import *
from yt.lagos.HaloFinding import HaloFinder
from yt.logger import lagosLogger as mylog
import yt.extensions.HaloProfiler as HP
@@ -41,7 +41,7 @@
"SnapCurrentTimeIdentifier":"INTEGER",
"SnapZ":"FLOAT",
"SnapHaloID":"INTEGER",
-"DarkMatterMass":"FLOAT",
+"HaloMass":"FLOAT",
"NumPart":"INTEGER",
"CenMassX":"FLOAT",
"CenMassY":"FLOAT",
@@ -63,7 +63,7 @@
# In order.
columns = ["GlobalHaloID", "SnapCurrentTimeIdentifier", "SnapZ",
-"SnapHaloID", "DarkMatterMass", "NumPart", "CenMassX", "CenMassY",
+"SnapHaloID", "HaloMass", "NumPart", "CenMassX", "CenMassY",
"CenMassZ", "BulkVelX", "BulkVelY", "BulkVelZ", "MaxRad",
"ChildHaloID0", "ChildHaloFrac0",
"ChildHaloID1", "ChildHaloFrac1",
@@ -94,6 +94,7 @@
FOF_link_length=0.2, dm_only=False, refresh=False, sleep=1,
index=True):
self.restart_files = restart_files # list of enzo restart files
+ self.with_halos = na.ones(len(restart_files), dtype='bool')
self.database = database # the sqlite database of haloes.
self.halo_finder_function = halo_finder_function # which halo finder to use
self.halo_finder_threshold = halo_finder_threshold # overdensity threshold
@@ -121,16 +122,19 @@
self._create_halo_table()
self._run_halo_finder_add_to_db()
# Find the h5 file names for all the halos.
- mylog.info("Opening HDF5 files.")
for snap in self.restart_files:
self._build_h5_refs(snap)
# Loop over the pairs of snapshots to locate likely neighbors, and
# then use those likely neighbors to compute fractional contributions.
- for pair in zip(self.restart_files[:-1], self.restart_files[1:]):
- self._build_h5_refs(pair[0])
- self._build_h5_refs(pair[1])
+ last = None
+ for snap, pair in enumerate(zip(self.restart_files[:-1], self.restart_files[1:])):
+ if not self.with_halos[snap] or not self.with_halos[snap+1]:
+ continue
self._find_likely_children(pair[0], pair[1])
- self._compute_child_fraction(pair[0], pair[1])
+ # last is the data for the parent dataset, which can be supplied
+ # as the child from the previous round for all but the first loop.
+ last = self._compute_child_fraction(pair[0], pair[1], last)
+ del last
if self.mine == 0 and index:
mylog.info("Creating database index.")
line = "CREATE INDEX IF NOT EXISTS HalosIndex ON Halos ("
@@ -140,7 +144,7 @@
self.cursor.execute(line)
self._barrier()
self._close_database()
- self._close_h5fp()
+ mylog.info("Done!")
def _read_halo_lists(self):
self.halo_lists = []
@@ -149,7 +153,7 @@
self.halo_lists.append(hp.all_halos)
def _run_halo_finder_add_to_db(self):
- for file in self.restart_files:
+ for cycle, file in enumerate(self.restart_files):
gc.collect()
pf = lagos.EnzoStaticOutput(file)
# If the halos are already found, skip this data step, unless
@@ -171,6 +175,10 @@
halos.write_out(os.path.join(dir, 'MergerHalos.out'))
halos.write_particle_lists(os.path.join(dir, 'MergerHalos'))
halos.write_particle_lists_txt(os.path.join(dir, 'MergerHalos'))
+ if len(halos) == 0:
+ mylog.info("Dataset %s has no halos." % file)
+ self.with_halos[cycle] = False
+ continue
del halos
# Now add halo data to the db if it isn't already there by
# checking the first halo.
@@ -185,6 +193,10 @@
# Read the halos off the disk using the Halo Profiler tools.
hp = HP.HaloProfiler(file, halo_list_file='MergerHalos.out',
halo_list_format={'id':0, 'mass':1, 'numpart':2, 'center':[7, 8, 9], 'velocity':[10, 11, 12], 'r_max':13})
+ if len(hp.all_halos) == 0:
+ mylog.info("Dataset %s has no halos." % file)
+ self.with_halos[cycle] = False
+ continue
mylog.info("Entering halos into database for z=%f" % red)
if self.mine == 0:
for ID,halo in enumerate(hp.all_halos):
@@ -252,7 +264,7 @@
# Create the table that will store the halo data.
line = "CREATE TABLE Halos (GlobalHaloID INTEGER PRIMARY KEY,\
SnapCurrentTimeIdentifier INTEGER, SnapZ FLOAT, SnapHaloID INTEGER, \
- DarkMatterMass FLOAT,\
+ HaloMass FLOAT,\
NumPart INTEGER, CenMassX FLOAT, CenMassY FLOAT,\
CenMassZ FLOAT, BulkVelX FLOAT, BulkVelY FLOAT, BulkVelZ FLOAT,\
MaxRad FLOAT,\
@@ -284,15 +296,17 @@
# Build the kdtree for the children by looping over the fetched rows.
child_points = []
for row in self.cursor:
- p = Point()
- p.data = [row[1], row[2], row[3]]
- p.ID = row[0]
- child_points.append(p)
- child_kd = buildKdHyperRectTree(child_points[:],10)
-
- # Make these just once.
- neighbors = Neighbors()
- neighbors.k = 5
+ child_points.append([row[1], row[2], row[3]])
+ # Turn it into fortran.
+ child_points = na.array(child_points)
+ fKD.pos = na.asfortranarray(child_points.T)
+ fKD.qv = na.empty(3, dtype='float64')
+ fKD.dist = na.empty(5, dtype='float64')
+ fKD.tags = na.empty(5, dtype='int64')
+ fKD.nn = 5
+ fKD.sort = True
+ fKD.rearrange = True
+ create_tree()
# Find the parent points from the database.
parent_pf = lagos.EnzoStaticOutput(parentfile)
@@ -305,13 +319,12 @@
# parents.
candidates = {}
for row in self.cursor:
- neighbors.points = []
- neighbors.minDistanceSquared = 100. # should make this a function of the simulation
- cm = [row[1], row[2], row[3]]
- getKNN(cm, child_kd, neighbors, 0., [1.]*3)
+ fKD.qv = na.array([row[1], row[2], row[3]])
+ find_nn_nearest_neighbors()
+ NNtags = fKD.tags[:] - 1
nIDs = []
- for n in neighbors.points:
- nIDs.append(n[1].ID)
+ for n in NNtags:
+ nIDs.append(n)
if len(nIDs) < 5:
# We need to fill in fake halos if there aren't enough halos,
# which can happen at high redshifts.
@@ -319,13 +332,26 @@
nIDs.append(-1)
candidates[row[0]] = nIDs
+ del fKD.pos, fKD.tags, fKD.dist
+ free_tree() # Frees the kdtree object.
+
self.candidates = candidates
+
+ # This stores the masses contributed to each child candidate.
+ self.child_mass_arr = na.zeros(len(candidates)*5, dtype='float64')
+ # Records where to put the entries in the above array.
+ self.child_mass_loc = defaultdict(dict)
+ for i,halo in enumerate(sorted(candidates)):
+ for j, child in enumerate(candidates[halo]):
+ self.child_mass_loc[halo][child] = i*5 + j
def _build_h5_refs(self, filename):
# For this snapshot, add lists of file names that contain the
# particle info for each halo.
if not hasattr(self, 'h5files'):
self.h5files = defaultdict(dict)
+ if not hasattr(self, 'names'):
+ self.names = defaultdict(set)
file_pf = lagos.EnzoStaticOutput(filename)
currt = file_pf['CurrentTimeIdentifier']
dir = os.path.dirname(filename)
@@ -337,21 +363,10 @@
line = line.strip().split()
self.h5files[currt][i] = line[1:]
names.update(line[1:])
+ self.names[currt].update(line[1:])
lines.close()
- # As long as we're here, open each of the h5 files and store the
- # pointer in a persistent dict.
- if not hasattr(self, 'h5fp'):
- self.h5fp = defaultdict(dict)
- for name in names:
- self.h5fp[currt][name] = h5py.File(name)
- def _close_h5fp(self):
- # Cleanly close the open h5 file pointers.
- for currt in self.h5fp:
- for name in self.h5fp[currt]:
- self.h5fp[currt][name].close()
-
- def _compute_child_fraction(self, parentfile, childfile):
+ def _compute_child_fraction(self, parentfile, childfile, last):
# Given a parent and child snapshot, and a list of child candidates,
# compute what fraction of the parent halo goes to each of the children.
@@ -363,90 +378,224 @@
mylog.info("Computing fractional contribututions of particles to z=%1.5f halos." % \
child_pf['CosmologyCurrentRedshift'])
- child_percents = {}
- for i,halo in enumerate(self.candidates):
- if i%self.size != self.mine:
- continue
- # Read in its particle IDs
+ if last == None:
+ # First we're going to read in the particles, haloIDs and masses from
+ # the parent dataset.
+ parent_names = list(self.names[parent_currt])
+ parent_names.sort()
parent_IDs = na.array([], dtype='int64')
parent_masses = na.array([], dtype='float64')
- for h5name in self.h5files[parent_currt][halo]:
- # Get the correct time dict entry, and then the correct h5 file
- # from that snapshot, and then choose this parent's halo
- # group, and then the particle IDs. How's that for a long reach?
- new_IDs = self.h5fp[parent_currt][h5name]['Halo%08d' % halo]['particle_index']
- new_masses = self.h5fp[parent_currt][h5name]['Halo%08d' % halo]['ParticleMassMsun']
- parent_IDs = na.concatenate((parent_IDs, new_IDs[:]))
- parent_masses = na.concatenate((parent_masses, new_masses[:]))
- # Loop over its children.
- temp_percents = {}
- for child in self.candidates[halo]:
- if child == -1:
- # If this is a fake child, record zero contribution and move
- # on.
- temp_percents[-1] = 0.
+ parent_halos = na.array([], dtype='int32')
+ for i,pname in enumerate(parent_names):
+ if i>=self.mine and i%self.size==self.mine:
+ h5fp = h5py.File(pname)
+ for group in h5fp:
+ gID = int(group[4:])
+ thisIDs = h5fp[group]['particle_index'][:]
+ thisMasses = h5fp[group]['ParticleMassMsun'][:]
+ parent_IDs = na.concatenate((parent_IDs, thisIDs))
+ parent_masses = na.concatenate((parent_masses, thisMasses))
+ parent_halos = na.concatenate((parent_halos,
+ na.ones(thisIDs.size, dtype='int32') * gID))
+ h5fp.close()
+
+ # Sort the arrays by particle index in ascending order.
+ sort = parent_IDs.argsort()
+ parent_IDs = parent_IDs[sort]
+ parent_masses = parent_masses[sort]
+ parent_halos = parent_halos[sort]
+ del sort
+ else:
+ # We can use old data and save disk reading.
+ (parent_IDs, parent_masses, parent_halos) = last
+ # Used to communicate un-matched particles.
+ parent_send = na.ones(parent_IDs.size, dtype='bool')
+
+ # Now get the child halo data.
+ child_names = list(self.names[child_currt])
+ child_names.sort()
+ child_IDs = na.array([], dtype='int64')
+ child_masses = na.array([], dtype='float64')
+ child_halos = na.array([], dtype='int32')
+ for i,cname in enumerate(child_names):
+ if i>=self.mine and i%self.size==self.mine:
+ h5fp = h5py.File(cname)
+ for group in h5fp:
+ gID = int(group[4:])
+ thisIDs = h5fp[group]['particle_index'][:]
+ thisMasses = h5fp[group]['ParticleMassMsun'][:]
+ child_IDs = na.concatenate((child_IDs, thisIDs))
+ child_masses = na.concatenate((child_masses, thisMasses))
+ child_halos = na.concatenate((child_halos,
+ na.ones(thisIDs.size, dtype='int32') * gID))
+ h5fp.close()
+
+ # Sort the arrays by particle index.
+ sort = child_IDs.argsort()
+ child_IDs = child_IDs[sort]
+ child_masses = child_masses[sort]
+ child_halos = child_halos[sort]
+ child_send = na.ones(child_IDs.size, dtype='bool')
+ del sort
+
+ # Parent IDs on the left, child IDs on the right. We skip down both
+ # columns matching IDs. If they are out of synch, the index(es) is/are
+ # advanced until they match up again.
+ left = 0
+ right = 0
+ while left < parent_IDs.size and right < child_IDs.size:
+ if parent_IDs[left] == child_IDs[right]:
More information about the yt-svn
mailing list