[yt-svn] commit/yt: MatthewTurk: Merged in brittonsmith/yt (pull request #1921)

Wed Feb 3 09:15:13 PST 2016

1 new commit in yt:

https://bitbucket.org/yt_analysis/yt/commits/323a5fd343c2/
Changeset:   323a5fd343c2
Branch:      yt
User:        MatthewTurk
Date:        2016-02-03 17:15:07+00:00
Summary:     Merged in brittonsmith/yt (pull request #1921)

Adding halo data containers for gadget_fof frontend.
Affected #:  6 files

diff -r 9cd43900dce52ab4ff5f59d63b28a5bd6ccdd92c -r 323a5fd343c253c2211eb385d8f5a87a241ad7d1 doc/source/examining/loading_data.rst

--- a/doc/source/examining/loading_data.rst
+++ b/doc/source/examining/loading_data.rst
@@ -1232,10 +1232,9 @@
 
 yt has support for reading halo catalogs produced by Rockstar and the inline 
 FOF/SUBFIND halo finders of Gadget and OWLS.  The halo catalogs are treated as 
-particle datasets where each particle represents a single halo.  At this time, 
-yt does not have the ability to load the member particles for a given halo.  
-However, once loaded, further halo analysis can be performed using 
-:ref:`halo_catalog`.
+particle datasets where each particle represents a single halo.  Member particles
+for individual halos can be accessed through halo data containers.  Further halo
+analysis can be performed using :ref:`halo_catalog`.
 
 In the case where halo catalogs are written to multiple files, one must only 
 give the path to one of them.
@@ -1269,11 +1268,39 @@
    # x component of the spin
    print(ad["Subhalo", "SubhaloSpin_0"])
 
+Halo member particles are accessed by creating halo data containers with the
+type of halo ("Group" or "Subhalo") and the halo id.  Scalar values for halos
+can be accessed in the same way.  Halos also have mass, position, and velocity
+attributes.
+
+.. code-block:: python
+
+   halo = ds.halo("Group", 0)
+   # member particles for this halo
+   print halo["member_ids"]
+   # halo virial radius
+   print halo["Group_R_Crit200"]
+   # halo mass
+   print halo.mass
+
+Subhalos containers can be created using either their absolute ids or their
+subhalo ids.
+
+.. code-block:: python
+
+   # first subhalo of the first halo
+   subhalo = ds.halo("Subhalo", (0, 0))
+   # this subhalo's absolute id
+   print subhalo.group_identifier
+   # member particles
+   print subhalo["member_ids"]
+
 OWLS FOF/SUBFIND
 ^^^^^^^^^^^^^^^^
 
 OWLS halo catalogs have a very similar structure to regular Gadget halo catalogs.  
-The two field types are "FOF" and "SUBFIND".
+The two field types are "FOF" and "SUBFIND".  At this time, halo member particles
+cannot be loaded.
 
 .. code-block:: python
 

diff -r 9cd43900dce52ab4ff5f59d63b28a5bd6ccdd92c -r 323a5fd343c253c2211eb385d8f5a87a241ad7d1 yt/frontends/gadget_fof/api.py
--- a/yt/frontends/gadget_fof/api.py
+++ b/yt/frontends/gadget_fof/api.py
@@ -15,12 +15,19 @@
 #-----------------------------------------------------------------------------
 
 from .data_structures import \
-     GadgetFOFDataset
+    GadgetFOFParticleIndex, \
+    GadgetFOFHDF5File, \
+    GadgetFOFDataset, \
+    GadgetFOFHaloParticleIndex, \
+    GadgetFOFHaloDataset, \
+    GagdetFOFHaloContainer
 
 from .io import \
-     IOHandlerGadgetFOFHDF5
+    IOHandlerGadgetFOFHDF5, \
+    IOHandlerGadgetFOFHaloHDF5
 
 from .fields import \
-     GadgetFOFFieldInfo
+    GadgetFOFFieldInfo, \
+    GadgetFOFHaloFieldInfo
 
 from . import tests

diff -r 9cd43900dce52ab4ff5f59d63b28a5bd6ccdd92c -r 323a5fd343c253c2211eb385d8f5a87a241ad7d1 yt/frontends/gadget_fof/data_structures.py
--- a/yt/frontends/gadget_fof/data_structures.py
+++ b/yt/frontends/gadget_fof/data_structures.py
@@ -15,35 +15,44 @@
 #-----------------------------------------------------------------------------
 
 from collections import defaultdict
+from functools import partial
 from yt.utilities.on_demand_imports import _h5py as h5py
 import numpy as np
 import stat
-import glob
 import os
+import weakref
 
-from yt.funcs import only_on_root
-from yt.frontends.gadget_fof.fields import \
-    GadgetFOFFieldInfo
-
-from yt.utilities.cosmology import \
-    Cosmology
-from yt.utilities.exceptions import \
-    YTException
-from yt.utilities.logger import ytLogger as \
-    mylog
-from yt.geometry.particle_geometry_handler import \
-    ParticleIndex
+from yt.data_objects.data_containers import \
+    YTSelectionContainer
 from yt.data_objects.static_output import \
     Dataset, \
     ParticleFile
 from yt.frontends.gadget.data_structures import \
     _fix_unit_ordering
-
+from yt.frontends.gadget_fof.fields import \
+    GadgetFOFFieldInfo, \
+    GadgetFOFHaloFieldInfo
+from yt.funcs import \
+    only_on_root
+from yt.geometry.particle_geometry_handler import \
+    ParticleIndex
+from yt.utilities.cosmology import \
+    Cosmology
+from yt.utilities.logger import ytLogger as \
+    mylog
 
 class GadgetFOFParticleIndex(ParticleIndex):
     def __init__(self, ds, dataset_type):
         super(GadgetFOFParticleIndex, self).__init__(ds, dataset_type)
 
+    def _calculate_particle_count(self):
+        """
+        Calculate the total number of each type of particle.
+        """
+        self.particle_count = \
+          dict([(ptype, sum([d.total_particles[ptype] for d in self.data_files]))
+                 for ptype in self.ds.particle_types_raw])
+
     def _calculate_particle_index_starts(self):
         # Halo indices are not saved in the file, so we must count by hand.
         # File 0 has halos 0 to N_0 - 1, file 1 has halos N_0 to N_0 + N_1 - 1, etc.
@@ -57,9 +66,14 @@
                 particle_count[ptype] += data_file.total_particles[ptype]
             offset_count += data_file.total_offset
 
+        self._halo_index_start = \
+          dict([(ptype, np.array([data_file.index_start[ptype]
+                                  for data_file in self.data_files]))
+                for ptype in self.ds.particle_types_raw])
+
     def _calculate_file_offset_map(self):
-        # After the FOF  is performed, a load-balancing step redistributes halos 
-        # and then writes more fields.  Here, for each file, we create a list of 
+        # After the FOF is performed, a load-balancing step redistributes halos
+        # and then writes more fields.  Here, for each file, we create a list of
         # files which contain the rest of the redistributed particles.
         ifof = np.array([data_file.total_particles["Group"]
                          for data_file in self.data_files])
@@ -73,40 +87,54 @@
             data_file.offset_files = self.data_files[istart[i]: iend[i] + 1]
 
     def _detect_output_fields(self):
-        # TODO: Add additional fields
-        dsl = []
+        field_list = []
         units = {}
-        for dom in self.data_files:
-            fl, _units = self.io._identify_fields(dom)
+        found_fields = \
+          dict([(ptype, False)
+                for ptype, pnum in self.particle_count.items()
+                if pnum > 0])
+
+        for data_file in self.data_files:
+            fl, _units = self.io._identify_fields(data_file)
             units.update(_units)
-            dom._calculate_offsets(fl)
-            for f in fl:
-                if f not in dsl: dsl.append(f)
-        self.field_list = dsl
+            field_list.extend([f for f in fl if f not in field_list])
+            for ptype in found_fields:
+                found_fields[ptype] |= data_file.total_particles[ptype]
+            if all(found_fields.values()): break
+
+        self.field_list = field_list
         ds = self.dataset
-        ds.particle_types = tuple(set(pt for pt, ds in dsl))
-        # This is an attribute that means these particle types *actually*
-        # exist.  As in, they are real, in the dataset.
+        ds.particle_types = tuple(set(pt for pt, ds in field_list))
         ds.field_units.update(units)
-        ds.particle_types_raw = tuple(sorted(ds.particle_types))
-            
+        ds.particle_types_raw = ds.particle_types
+
     def _setup_geometry(self):
         super(GadgetFOFParticleIndex, self)._setup_geometry()
+        self._calculate_particle_count()
         self._calculate_particle_index_starts()
         self._calculate_file_offset_map()
-    
+
 class GadgetFOFHDF5File(ParticleFile):
     def __init__(self, ds, io, filename, file_id):
+        with h5py.File(filename, "r") as f:
+            self.header = \
+              dict((str(field), val)
+                   for field, val in f["Header"].attrs.items())
+            self.group_length_sum = f["Group/GroupLen"].value.sum() \
+              if "Group/GroupLen" in f else 0
+            self.group_subs_sum = f["Group/GroupNsubs"].value.sum() \
+              if "Group/GroupNsubs" in f else 0
+        self.total_ids = self.header["Nids_ThisFile"]
+        self.total_particles = \
+          {"Group": self.header["Ngroups_ThisFile"],
+           "Subhalo": self.header["Nsubgroups_ThisFile"]}
+        self.total_offset = 0 # I think this is no longer needed
         super(GadgetFOFHDF5File, self).__init__(ds, io, filename, file_id)
-        with h5py.File(filename, "r") as f:
-            self.header = dict((field, f.attrs[field]) \
-                               for field in f.attrs.keys())
-    
+
 class GadgetFOFDataset(Dataset):
     _index_class = GadgetFOFParticleIndex
     _file_class = GadgetFOFHDF5File
     _field_info_class = GadgetFOFFieldInfo
-    _suffix = ".hdf5"
 
     def __init__(self, filename, dataset_type="gadget_fof_hdf5",
                  n_ref=16, over_refine_factor=1,
@@ -122,13 +150,36 @@
             raise RuntimeError("units_override is not supported for GadgetFOFDataset. "+
                                "Use unit_base instead.")
         super(GadgetFOFDataset, self).__init__(filename, dataset_type,
-                                                 units_override=units_override)
+                                               units_override=units_override)
+
+    def add_field(self, *args, **kwargs):
+        super(GadgetFOFDataset, self).add_field(*args, **kwargs)
+        self._halos_ds.add_field(*args, **kwargs)
+
+    @property
+    def halos_field_list(self):
+        return self._halos_ds.field_list
+
+    @property
+    def halos_derived_field_list(self):
+        return self._halos_ds.derived_field_list
+
+    _instantiated_halo_ds = None
+    @property
+    def _halos_ds(self):
+        if self._instantiated_halo_ds is None:
+            self._instantiated_halo_ds = GadgetFOFHaloDataset(self)
+        return self._instantiated_halo_ds
+
+    def _setup_classes(self):
+        super(GadgetFOFDataset, self)._setup_classes()
+        self.halo = partial(GagdetFOFHaloContainer, ds=self._halos_ds)
 
     def _parse_parameter_file(self):
-        handle = h5py.File(self.parameter_filename, mode="r")
-        hvals = {}
-        hvals.update((str(k), v) for k, v in handle["/Header"].attrs.items())
-        hvals["NumFiles"] = hvals["NumFiles"]
+        with h5py.File(self.parameter_filename,"r") as f:
+            self.parameters = \
+              dict((str(field), val)
+                   for field, val in f["Header"].attrs.items())
 
         self.dimensionality = 3
         self.refine_by = 2
@@ -137,35 +188,29 @@
 
         # Set standard values
         self.domain_left_edge = np.zeros(3, "float64")
-        self.domain_right_edge = np.ones(3, "float64") * hvals["BoxSize"]
+        self.domain_right_edge = np.ones(3, "float64") * \
+          self.parameters["BoxSize"]
         nz = 1 << self.over_refine_factor
         self.domain_dimensions = np.ones(3, "int32") * nz
         self.cosmological_simulation = 1
         self.periodicity = (True, True, True)
-        self.current_redshift = hvals["Redshift"]
-        self.omega_lambda = hvals["OmegaLambda"]
-        self.omega_matter = hvals["Omega0"]
-        self.hubble_constant = hvals["HubbleParam"]
-
+        self.current_redshift = self.parameters["Redshift"]
+        self.omega_lambda = self.parameters["OmegaLambda"]
+        self.omega_matter = self.parameters["Omega0"]
+        self.hubble_constant = self.parameters["HubbleParam"]
         cosmology = Cosmology(hubble_constant=self.hubble_constant,
                               omega_matter=self.omega_matter,
                               omega_lambda=self.omega_lambda)
         self.current_time = cosmology.t_from_z(self.current_redshift)
 
-        self.parameters = hvals
         prefix = os.path.abspath(
             os.path.join(os.path.dirname(self.parameter_filename), 
                          os.path.basename(self.parameter_filename).split(".", 1)[0]))
-        
         suffix = self.parameter_filename.rsplit(".", 1)[-1]
         self.filename_template = "%s.%%(num)i.%s" % (prefix, suffix)
-        self.file_count = len(glob.glob(prefix + "*" + self._suffix))
-        if self.file_count == 0:
-            raise YTException(message="No data files found.", ds=self)
+        self.file_count = self.parameters["NumFiles"]
         self.particle_types = ("Group", "Subhalo")
         self.particle_types_raw = ("Group", "Subhalo")
-        
-        handle.close()
 
     def _set_code_unit_attributes(self):
         # Set a sane default for cosmological simulations.
@@ -223,6 +268,9 @@
             time_unit = (1., "s")        
         self.time_unit = self.quan(time_unit[0], time_unit[1])
 
+    def __repr__(self):
+        return self.basename.split(".", 1)[0]
+
     @classmethod
     def _is_valid(self, *args, **kwargs):
         need_groups = ['Group', 'Header', 'Subhalo']
@@ -237,3 +285,366 @@
             valid = False
             pass
         return valid
+
+class GadgetFOFHaloParticleIndex(GadgetFOFParticleIndex):
+    def __init__(self, ds, dataset_type):
+        self.real_ds = weakref.proxy(ds.real_ds)
+        super(GadgetFOFHaloParticleIndex, self).__init__(ds, dataset_type)
+
+    def _setup_geometry(self):
+        self._setup_data_io()
+
+        if self.real_ds._instantiated_index is None:
+            template = self.real_ds.filename_template
+            ndoms = self.real_ds.file_count
+            cls = self.real_ds._file_class
+            self.data_files = \
+              [cls(self.dataset, self.io, template % {'num':i}, i)
+               for i in range(ndoms)]
+        else:
+            self.data_files = self.real_ds.index.data_files
+
+        self._calculate_particle_index_starts()
+        self._calculate_particle_count()
+        self._create_halo_id_table()
+
+    def _create_halo_id_table(self):
+        """
+        Create a list of halo start ids so we know which file
+        contains particles for a given halo.  Note, the halo ids
+        are distributed over all files and so the ids for a given
+        halo are likely stored in a different file than the halo
+        itself.
+        """
+
+        self._halo_id_number = np.array([data_file.total_ids
+                            for data_file in self.data_files])
+        self._halo_id_end = self._halo_id_number.cumsum()
+        self._halo_id_start = self._halo_id_end - self._halo_id_number
+
+        self._group_length_sum = \
+          np.array([data_file.group_length_sum
+                    for data_file in self.data_files])
+
+    def _detect_output_fields(self):
+        field_list = []
+        scalar_field_list = []
+        units = {}
+        found_fields = \
+          dict([(ptype, False)
+                for ptype, pnum in self.particle_count.items()
+                if pnum > 0])
+
+        for data_file in self.data_files:
+            fl, sl, _units = self.io._identify_fields(data_file)
+            units.update(_units)
+            field_list.extend([f for f in fl
+                               if f not in field_list])
+            scalar_field_list.extend([f for f in sl
+                                      if f not in scalar_field_list])
+            for ptype in found_fields:
+                found_fields[ptype] |= data_file.total_particles[ptype]
+            if all(found_fields.values()): break
+
+        self.field_list = field_list
+        self.scalar_field_list = scalar_field_list
+        ds = self.dataset
+        ds.scalar_field_list = scalar_field_list
+        ds.particle_types = tuple(set(pt for pt, ds in field_list))
+        ds.field_units.update(units)
+        ds.particle_types_raw = ds.particle_types
+
+    def _identify_base_chunk(self, dobj):
+        pass
+
+    def _read_particle_fields(self, fields, dobj, chunk = None):
+        if len(fields) == 0: return {}, []
+        fields_to_read, fields_to_generate = self._split_fields(fields)
+        if len(fields_to_read) == 0:
+            return {}, fields_to_generate
+        fields_to_return = self.io._read_particle_selection(
+            dobj, fields_to_read)
+        return fields_to_return, fields_to_generate
+
+    def _get_halo_file_indices(self, ptype, identifiers):
+        return np.digitize(identifiers,
+            self._halo_index_start[ptype], right=False) - 1
+
+    def _get_halo_scalar_index(self, ptype, identifier):
+        i_scalar = self._get_halo_file_indices(ptype, [identifier])[0]
+        scalar_index = identifier - self._halo_index_start[ptype][i_scalar]
+        return scalar_index
+
+    def _get_halo_values(self, ptype, identifiers, fields,
+                         f=None):
+        """
+        Get field values for halos.  IDs are likely to be
+        sequential (or at least monotonic), but not necessarily
+        all within the same file.
+
+        This does not do much to minimize file i/o, but with
+        halos randomly distributed across files, there's not
+        much more we can do.
+        """
+
+        # if a file is already open, don't open it again
+        filename = None if f is None \
+          else f.filename
+
+        data = defaultdict(lambda: np.empty(identifiers.size))
+        i_scalars = self._get_halo_file_indices(ptype, identifiers)
+        for i_scalar in np.unique(i_scalars):
+            target = i_scalars == i_scalar
+            scalar_indices = identifiers - \
+              self._halo_index_start[ptype][i_scalar]
+
+            # only open file if it's not already open
+            my_f = f if self.data_files[i_scalar].filename == filename \
+              else h5py.File(self.data_files[i_scalar].filename, "r")
+
+            for field in fields:
+                data[field][target] = \
+                  my_f[os.path.join(ptype, field)].value[scalar_indices[target]]
+
+            if self.data_files[i_scalar].filename != filename: my_f.close()
+
+        return data
+
+class GadgetFOFHaloDataset(Dataset):
+    _index_class = GadgetFOFHaloParticleIndex
+    _file_class = GadgetFOFHDF5File
+    _field_info_class = GadgetFOFHaloFieldInfo
+
+    def __init__(self, ds, dataset_type="gadget_fof_halo_hdf5"):
+        self.real_ds = ds
+        self.particle_types_raw = self.real_ds.particle_types_raw
+        self.particle_types = self.particle_types_raw
+
+        super(GadgetFOFHaloDataset, self).__init__(
+            self.real_ds.parameter_filename, dataset_type)
+
+    def print_key_parameters(self):
+        pass
+
+    def _set_derived_attrs(self):
+        pass
+
+    def _parse_parameter_file(self):
+        for attr in ["cosmological_simulation", "cosmology",
+                     "current_redshift", "current_time",
+                     "dimensionality", "domain_dimensions",
+                     "domain_left_edge", "domain_right_edge",
+                     "domain_width", "hubble_constant",
+                     "omega_lambda", "omega_matter",
+                     "unique_identifier"]:
+            setattr(self, attr, getattr(self.real_ds, attr))
+
+    def set_code_units(self):
+        for unit in ["length", "time", "mass",
+                     "velocity", "magnetic", "temperature"]:
+            my_unit = "%s_unit" % unit
+            setattr(self, my_unit, getattr(self.real_ds, my_unit, None))
+        self.unit_registry = self.real_ds.unit_registry
+
+    def __repr__(self):
+        return "%s" % self.real_ds
+
+    def _setup_classes(self):
+        self.objects = []
+
+class GagdetFOFHaloContainer(YTSelectionContainer):
+    """
+    Create a data container to get member particles and individual
+    values from halos and subhalos.  Halo mass, position, and
+    velocity are set as attributes.  Halo IDs are accessible
+    through the field, "member_ids".  Other fields that are one
+    value per halo are accessible as normal.  The field list for
+    halo objects can be seen in `ds.halos_field_list`.
+
+    Parameters
+    ----------
+    ptype : string
+        The type of halo, either "Group" for the main halo or
+        "Subhalo" for subhalos.
+    particle_identifier : int or tuple of (int, int)
+        The halo or subhalo id.  If requesting a subhalo, the id
+        can also be given as a tuple of the main halo id and
+        subgroup id, such as (1, 4) for subgroup 4 of halo 1.
+
+    Halo Container Attributes
+    -------------------------
+    particle_identifier : int
+        The id of the halo or subhalo.
+    group_identifier : int
+        For subhalos, the id of the enclosing halo.
+    subgroup_identifier : int
+        For subhalos, the relative id of the subhalo within
+        the enclosing halo.
+    particle_number : int
+        Number of particles in the halo.
+    mass : float
+        Halo mass.
+    position : array of floats
+        Halo position.
+    velocity : array of floats
+        Halo velocity.
+
+    Relevant Fields
+    ---------------
+    particle_number :
+        number of particles
+    subhalo_number :
+        number of subhalos
+    group_identifier :
+        id of parent group for subhalos
+
+    Examples
+    --------
+
+    >>> import yt
+    >>> ds = yt.load("gadget_halos/data/groups_298/fof_subhalo_tab_298.0.hdf5")
+    >>>
+    >>> halo = ds.halo("Group", 0)
+    >>> print halo.mass
+    13256.5517578 code_mass
+    >>> print halo.position
+    [ 16.18603706   6.95965052  12.52694607] code_length
+    >>> print halo.velocity
+    [ 6943694.22793569  -762788.90647454  -794749.63819757] cm/s
+    >>> print halo["Group_R_Crit200"]
+    [ 0.79668683] code_length
+    >>>
+    >>> # particle ids for this halo
+    >>> print halo["member_ids"]
+    [  723631.   690744.   854212. ...,   608589.   905551.  1147449.] dimensionless
+    >>>
+    >>> # get the first subhalo of this halo
+    >>> subhalo = ds.halo("Subhalo", (0, 0))
+    >>> print subhalo["member_ids"]
+    [  723631.   690744.   854212. ...,   808362.   956359.  1248821.] dimensionless
+
+    """
+
+    _type_name = "halo"
+    _con_args = ("ptype", "particle_identifier")
+    _spatial = False
+
+    def __init__(self, ptype, particle_identifier, ds=None):
+        if ptype not in ds.particle_types_raw:
+            raise RuntimeError("Possible halo types are %s, supplied \"%s\"." %
+                               (ds.particle_types_raw, ptype))
+
+        self.ptype = ptype
+        self._current_particle_type = ptype
+        super(GagdetFOFHaloContainer, self).__init__(ds, {})
+
+        if ptype == "Subhalo" and isinstance(particle_identifier, tuple):
+            self.group_identifier, self.subgroup_identifier = \
+              particle_identifier
+            my_data = self.index._get_halo_values(
+                "Group", np.array([self.group_identifier]),
+                ["GroupFirstSub"])
+            self.particle_identifier = \
+              np.int64(my_data["GroupFirstSub"][0] + self.subgroup_identifier)
+        else:
+            self.particle_identifier = particle_identifier
+
+        if self.particle_identifier >= self.index.particle_count[ptype]:
+            raise RuntimeError("%s %d requested, but only %d %s objects exist." %
+                               (ptype, particle_identifier,
+                                self.index.particle_count[ptype], ptype))
+
+        # Find the file that has the scalar values for this halo.
+        i_scalar = self.index._get_halo_file_indices(
+            ptype, [self.particle_identifier])[0]
+        self.scalar_data_file = self.index.data_files[i_scalar]
+
+        # index within halo arrays that corresponds to this halo
+        self.scalar_index = self.index._get_halo_scalar_index(
+            ptype, self.particle_identifier)
+
+        halo_fields = ["%sLen" % ptype]
+        if ptype == "Subhalo": halo_fields.append("SubhaloGrNr")
+        my_data = self.index._get_halo_values(
+            ptype, np.array([self.particle_identifier]),
+            halo_fields)
+        self.particle_number = np.int64(my_data["%sLen" % ptype][0])
+
+        if ptype == "Group":
+            self.group_identifier = self.particle_identifier
+            id_offset = 0
+            # index of file that has scalar values for the group
+            g_scalar = i_scalar
+            group_index = self.scalar_index
+
+        # If a subhalo, find the index of the parent.
+        elif ptype == "Subhalo":
+            self.group_identifier = np.int64(my_data["SubhaloGrNr"][0])
+
+            # Find the file that has the scalar values for the parent group.
+            g_scalar = self.index._get_halo_file_indices(
+                "Group", [self.group_identifier])[0]
+
+            # index within halo arrays that corresponds to the paent group
+            group_index = self.index._get_halo_scalar_index(
+                "Group", self.group_identifier)
+
+            my_data = self.index._get_halo_values(
+                "Group", np.array([self.group_identifier]),
+                ["GroupNsubs", "GroupFirstSub"])
+            self.subgroup_identifier = self.particle_identifier - \
+              np.int64(my_data["GroupFirstSub"][0])
+            parent_subhalos = my_data["GroupNsubs"][0]
+
+            mylog.debug("Subhalo %d is subgroup %s of %d in group %d." % \
+                        (self.particle_identifier, self.subgroup_identifier,
+                        parent_subhalos, self.group_identifier))
+
+            # ids of the sibling subhalos that come before this one
+            if self.subgroup_identifier > 0:
+                sub_ids = np.arange(
+                    self.particle_identifier - self.subgroup_identifier,
+                    self.particle_identifier)
+                my_data = self.index._get_halo_values(
+                    "Subhalo", sub_ids, ["SubhaloLen"])
+                id_offset = my_data["SubhaloLen"].sum(dtype=np.int64)
+            else:
+                id_offset = 0
+
+        # Calculate the starting index for the member particles.
+        # First, add up all the particles in the earlier files.
+        all_id_start = self.index._group_length_sum[:g_scalar].sum(dtype=np.int64)
+
+        # Now add the halos in this file that come before.
+        with h5py.File(self.index.data_files[g_scalar].filename, "r") as f:
+            all_id_start += f["Group"]["GroupLen"][:group_index].sum(dtype=np.int64)
+
+        # Add the subhalo offset.
+        all_id_start += id_offset
+
+        # indices of first and last files containing member particles
+        i_start = np.digitize([all_id_start],
+                              self.index._halo_id_start,
+                              right=False)[0] - 1
+        i_end = np.digitize([all_id_start+self.particle_number],
+                            self.index._halo_id_end,
+                            right=True)[0]
+        self.field_data_files = self.index.data_files[i_start:i_end+1]
+
+        # starting and ending indices for each file containing particles
+        self.field_data_start = \
+          (all_id_start -
+           self.index._halo_id_start[i_start:i_end+1]).clip(min=0)
+        self.field_data_start = self.field_data_start.astype(np.int64)
+        self.field_data_end = \
+          (all_id_start + self.particle_number -
+           self.index._halo_id_start[i_start:i_end+1]).clip(
+               max=self.index._halo_id_number[i_start:i_end+1])
+        self.field_data_end = self.field_data_end.astype(np.int64)
+
+        for attr in ["mass", "position", "velocity"]:
+            setattr(self, attr, self[self.ptype, "particle_%s" % attr][0])
+
+    def __repr__(self):
+        return "%s_%s_%09d" % \
+          (self.ds, self.ptype, self.particle_identifier)

diff -r 9cd43900dce52ab4ff5f59d63b28a5bd6ccdd92c -r 323a5fd343c253c2211eb385d8f5a87a241ad7d1 yt/frontends/gadget_fof/fields.py
--- a/yt/frontends/gadget_fof/fields.py
+++ b/yt/frontends/gadget_fof/fields.py
@@ -21,28 +21,70 @@
 p_units = "code_length"
 v_units = "code_velocity"
 
+_pnums = 6
+_type_fields = \
+  tuple(("%s%sType_%d" % (ptype, field, pnum), (units, [], None))
+        for pnum in range(_pnums)
+        for field, units in (("Mass", m_units), ("Len", p_units))
+        for ptype in ("Group", "Subhalo"))
+_sub_type_fields = \
+  tuple(("Subhalo%sType_%d" % (field, pnum), (units, [], None))
+        for pnum in range(_pnums)
+        for field, units in (("HalfmassRad", p_units),
+                             ("MassInHalfRad", m_units),
+                             ("MassInMaxRad", m_units),
+                             ("MassInRad", m_units)))
+
+_particle_fields = (
+    ("GroupPos_0",           (p_units, ["Group", "particle_position_x"], None)),
+    ("GroupPos_1",           (p_units, ["Group", "particle_position_y"], None)),
+    ("GroupPos_2",           (p_units, ["Group", "particle_position_z"], None)),
+    ("GroupVel_0",           (v_units, ["Group", "particle_velocity_x"], None)),
+    ("GroupVel_1",           (v_units, ["Group", "particle_velocity_y"], None)),
+    ("GroupVel_2",           (v_units, ["Group", "particle_velocity_z"], None)),
+    ("GroupMass",            (m_units, ["Group", "particle_mass"], None)),
+    ("GroupLen",             ("",      ["Group", "particle_number"], None)),
+    ("GroupNsubs",           ("",      ["Group", "subhalo_number"], None)),
+    ("GroupFirstSub",        ("",      [], None)),
+    ("Group_M_Crit200",      (m_units, [], None)),
+    ("Group_M_Crit500",      (m_units, [], None)),
+    ("Group_M_Mean200",      (m_units, [], None)),
+    ("Group_M_TopHat200",    (m_units, [], None)),
+    ("Group_R_Crit200",      (p_units, [], None)),
+    ("Group_R_Crit500",      (p_units, [], None)),
+    ("Group_R_Mean200",      (p_units, [], None)),
+    ("Group_R_TopHat200",    (p_units, [], None)),
+    ("SubhaloPos_0",         (p_units, ["Subhalo", "particle_position_x"], None)),
+    ("SubhaloPos_1",         (p_units, ["Subhalo", "particle_position_y"], None)),
+    ("SubhaloPos_2",         (p_units, ["Subhalo", "particle_position_z"], None)),
+    ("SubhaloVel_0",         (v_units, ["Subhalo", "particle_velocity_x"], None)),
+    ("SubhaloVel_1",         (v_units, ["Subhalo", "particle_velocity_y"], None)),
+    ("SubhaloVel_2",         (v_units, ["Subhalo", "particle_velocity_z"], None)),
+    ("SubhaloMass",          (m_units, ["Subhalo", "particle_mass"], None)),
+    ("SubhaloLen",           ("",      ["Subhalo", "particle_number"], None)),
+    ("SubhaloCM_0",          (p_units, ["Subhalo", "center_of_mass_x"], None)),
+    ("SubhaloCM_1",          (p_units, ["Subhalo", "center_of_mass_y"], None)),
+    ("SubhaloCM_2",          (p_units, ["Subhalo", "center_of_mass_z"], None)),
+    ("SubhaloSpin_0",        ("",      ["Subhalo", "spin_x"], None)),
+    ("SubhaloSpin_1",        ("",      ["Subhalo", "spin_y"], None)),
+    ("SubhaloSpin_2",        ("",      ["Subhalo", "spin_z"], None)),
+    ("SubhaloGrNr",          ("",      ["Subhalo", "group_identifier"], None)),
+    ("SubhaloHalfmassRad",   (p_units, [], None)),
+    ("SubhaloIDMostbound",   ("",      [], None)),
+    ("SubhaloMassInHalfRad", (m_units, [], None)),
+    ("SubhaloMassInMaxRad",  (m_units, [], None)),
+    ("SubhaloMassInRad",     (m_units, [], None)),
+    ("SubhaloParent",        ("",      [], None)),
+    ("SubhaloVelDisp",       (v_units, ["Subhalo", "velocity_dispersion"], None)),
+    ("SubhaloVmax",          (v_units, [], None)),
+    ("SubhaloVmaxRad",       (p_units, [], None)),
+) + _type_fields + _sub_type_fields
+
 class GadgetFOFFieldInfo(FieldInfoContainer):
     known_other_fields = (
     )
 
-    known_particle_fields = (
-        ("GroupPos_0", (p_units, ["Group", "particle_position_x"], None)),
-        ("GroupPos_1", (p_units, ["Group", "particle_position_y"], None)),
-        ("GroupPos_2", (p_units, ["Group", "particle_position_z"], None)),
-        ("GroupVel_0", (v_units, ["Group", "particle_velocity_x"], None)),
-        ("GroupVel_1", (v_units, ["Group", "particle_velocity_y"], None)),
-        ("GroupVel_2", (v_units, ["Group", "particle_velocity_z"], None)),
-        ("GroupMass",  (m_units, ["Group", "particle_mass"], None)),
-        ("GroupLen",   ("",      ["Group", "particle_number"], None)),
-        ("SubhaloPos_0", (p_units, ["Subhalo", "particle_position_x"], None)),
-        ("SubhaloPos_1", (p_units, ["Subhalo", "particle_position_y"], None)),
-        ("SubhaloPos_2", (p_units, ["Subhalo", "particle_position_z"], None)),
-        ("SubhaloVel_0", (v_units, ["Subhalo", "particle_velocity_x"], None)),
-        ("SubhaloVel_1", (v_units, ["Subhalo", "particle_velocity_y"], None)),
-        ("SubhaloVel_2", (v_units, ["Subhalo", "particle_velocity_z"], None)),
-        ("SubhaloMass",  (m_units, ["Subhalo", "particle_mass"], None)),
-        ("SubhaloLen",   ("",      ["Subhalo", "particle_number"], None)),
-    )
+    known_particle_fields = _particle_fields
 
     # these are extra fields to be created for the "all" particle type
     extra_union_fields = (
@@ -56,3 +98,10 @@
         ("",      "particle_number"),
         ("",      "particle_ones"),
     )
+
+class GadgetFOFHaloFieldInfo(FieldInfoContainer):
+    known_other_fields = (
+    )
+
+    known_particle_fields = _particle_fields + \
+      (("ID", ("", ["member_ids"], None)),)

diff -r 9cd43900dce52ab4ff5f59d63b28a5bd6ccdd92c -r 323a5fd343c253c2211eb385d8f5a87a241ad7d1 yt/frontends/gadget_fof/io.py
--- a/yt/frontends/gadget_fof/io.py
+++ b/yt/frontends/gadget_fof/io.py
@@ -14,16 +14,17 @@
 # The full license is in the file COPYING.txt, distributed with this software.
 #-----------------------------------------------------------------------------
 
+from collections import defaultdict
 from yt.utilities.on_demand_imports import _h5py as h5py
 import numpy as np
 
-from yt.utilities.exceptions import YTDomainOverflow
 from yt.funcs import mylog
-
+from yt.utilities.exceptions import \
+    YTDomainOverflow
 from yt.utilities.io_handler import \
     BaseIOHandler
-
-from yt.utilities.lib.geometry_utils import compute_morton
+from yt.utilities.lib.geometry_utils import \
+    compute_morton
 
 class IOHandlerGadgetFOFHDF5(BaseIOHandler):
     _dataset_type = "gadget_fof_hdf5"
@@ -55,7 +56,8 @@
 
     def _read_offset_particle_field(self, field, data_file, fh):
         field_data = np.empty(data_file.total_particles["Group"], dtype="float64")
-        fofindex = np.arange(data_file.total_particles["Group"]) + data_file.index_start["Group"]
+        fofindex = np.arange(data_file.total_particles["Group"]) + \
+          data_file.index_start["Group"]
         for offset_file in data_file.offset_files:
             if fh.filename == offset_file.filename:
                 ofh = fh
@@ -105,7 +107,6 @@
                                 field_data = f[ptype][fname].value.astype("float64")
                                 my_div = field_data.size / pcount
                                 if my_div > 1:
-                                    field_data = np.resize(field_data, (pcount, my_div))
                                     findex = int(field[field.rfind("_") + 1:])
                                     field_data = field_data[:, findex]
                         data = field_data[mask]
@@ -149,11 +150,7 @@
         return morton
 
     def _count_particles(self, data_file):
-        with h5py.File(data_file.filename, "r") as f:
-            pcount = {"Group": f["Header"].attrs["Ngroups_ThisFile"],
-                      "Subhalo": f["Header"].attrs["Nsubgroups_ThisFile"]}
-            data_file.total_offset = 0 # need to figure out how subfind works here
-            return pcount
+        return data_file.total_particles
 
     def _identify_fields(self, data_file):
         fields = []
@@ -169,6 +166,154 @@
                 self.offset_fields = self.offset_fields.union(set(my_offset_fields))
         return fields, {}
 
+class IOHandlerGadgetFOFHaloHDF5(IOHandlerGadgetFOFHDF5):
+    _dataset_type = "gadget_fof_halo_hdf5"
+
+    def _read_particle_coords(self, chunks, ptf):
+        pass
+
+    def _read_particle_selection(self, dobj, fields):
+        rv = {}
+        ind = {}
+        # We first need a set of masks for each particle type
+        ptf = defaultdict(list)        # ON-DISK TO READ
+        fsize = defaultdict(lambda: 0) # COUNT RV
+        field_maps = defaultdict(list) # ptypes -> fields
+        unions = self.ds.particle_unions
+        # What we need is a mapping from particle types to return types
+        for field in fields:
+            ftype, fname = field
+            fsize[field] = 0
+            # We should add a check for p.fparticle_unions or something here
+            if ftype in unions:
+                for pt in unions[ftype]:
+                    ptf[pt].append(fname)
+                    field_maps[pt, fname].append(field)
+            else:
+                ptf[ftype].append(fname)
+                field_maps[field].append(field)
+
+        # Now we allocate
+        psize = {dobj.ptype: dobj.particle_number}
+        for field in fields:
+            if field[0] in unions:
+                for pt in unions[field[0]]:
+                    fsize[field] += psize.get(pt, 0)
+            else:
+                fsize[field] += psize.get(field[0], 0)
+        for field in fields:
+            if field[1] in self._vector_fields:
+                shape = (fsize[field], self._vector_fields[field[1]])
+            elif field[1] in self._array_fields:
+                shape = (fsize[field],)+self._array_fields[field[1]]
+            elif field in self.ds.scalar_field_list:
+                shape = (1,)
+            else:
+                shape = (fsize[field], )
+            rv[field] = np.empty(shape, dtype="float64")
+            ind[field] = 0
+        # Now we read.
+        for field_r, vals in self._read_particle_fields(dobj, ptf):
+            # Note that we now need to check the mappings
+            for field_f in field_maps[field_r]:
+                my_ind = ind[field_f]
+                rv[field_f][my_ind:my_ind + vals.shape[0],...] = vals
+                ind[field_f] += vals.shape[0]
+        # Now we need to truncate all our fields, since we allow for
+        # over-estimating.
+        for field_f in ind:
+            rv[field_f] = rv[field_f][:ind[field_f]]
+        return rv
+
+    def _read_scalar_fields(self, dobj, scalar_fields):
+        all_data = {}
+        if not scalar_fields: return all_data
+        pcount = 1
+        with h5py.File(dobj.scalar_data_file.filename, "r") as f:
+            for ptype, field_list in sorted(scalar_fields.items()):
+                for field in field_list:
+                    if field == "particle_identifier":
+                        field_data = \
+                          np.arange(dobj.scalar_data_file.total_particles[ptype]) + \
+                          dobj.scalar_data_file.index_start[ptype]
+                    elif field in f[ptype]:
+                        field_data = f[ptype][field].value.astype("float64")
+                    else:
+                        fname = field[:field.rfind("_")]
+                        field_data = f[ptype][fname].value.astype("float64")
+                        my_div = field_data.size / pcount
+                        if my_div > 1:
+                            findex = int(field[field.rfind("_") + 1:])
+                            field_data = field_data[:, findex]
+                    data = np.array([field_data[dobj.scalar_index]])
+                    all_data[(ptype, field)] = data
+        return all_data
+
+    def _read_member_fields(self, dobj, member_fields):
+        all_data = defaultdict(lambda: np.empty(dobj.particle_number,
+                                                dtype=np.float64))
+        if not member_fields: return all_data
+        field_start = 0
+        for i, data_file in enumerate(dobj.field_data_files):
+            start_index = dobj.field_data_start[i]
+            end_index = dobj.field_data_end[i]
+            pcount = end_index - start_index
+            field_end = field_start + end_index - start_index
+            with h5py.File(data_file.filename, "r") as f:
+                for ptype, field_list in sorted(member_fields.items()):
+                    for field in field_list:
+                        field_data = all_data[(ptype, field)]
+                        if field in f["IDs"]:
+                            my_data = \
+                              f["IDs"][field][start_index:end_index].astype("float64")
+                        else:
+                            fname = field[:field.rfind("_")]
+                            my_data = \
+                              f["IDs"][fname][start_index:end_index].astype("float64")
+                            my_div = my_data.size / pcount
+                            if my_div > 1:
+                                findex = int(field[field.rfind("_") + 1:])
+                                my_data = my_data[:, findex]
+                        field_data[field_start:field_end] = my_data
+            field_start = field_end
+        return all_data
+
+    def _read_particle_fields(self, dobj, ptf):
+        # separate member particle fields from scalar fields
+        scalar_fields = defaultdict(list)
+        member_fields = defaultdict(list)
+        for ptype, field_list in sorted(ptf.items()):
+            for field in field_list:
+                if (ptype, field) in self.ds.scalar_field_list:
+                    scalar_fields[ptype].append(field)
+                else:
+                    member_fields[ptype].append(field)
+
+        all_data = self._read_scalar_fields(dobj, scalar_fields)
+        all_data.update(self._read_member_fields(dobj, member_fields))
+
+        for field, field_data in all_data.items():
+            yield field, field_data
+
+    def _identify_fields(self, data_file):
+        fields = []
+        scalar_fields = []
+        pcount = data_file.total_particles
+        if sum(pcount.values()) == 0: return fields, scalar_fields, {}
+        with h5py.File(data_file.filename, "r") as f:
+            for ptype in self.ds.particle_types_raw:
+                if data_file.total_particles[ptype] == 0: continue
+                fields.append((ptype, "particle_identifier"))
+                scalar_fields.append((ptype, "particle_identifier"))
+                my_fields, my_offset_fields = \
+                  subfind_field_list(f[ptype], ptype, data_file.total_particles)
+                fields.extend(my_fields)
+                scalar_fields.extend(my_fields)
+
+                if "IDs" not in f: continue
+                fields.extend([(ptype, field) for field in f["IDs"]])
+        return fields, scalar_fields, {}
+
 def subfind_field_list(fh, ptype, pcount):
     fields = []
     offset_fields = []
@@ -187,12 +332,12 @@
                         fields.append((ptype, "%s_%d" % (fname, i)))
                 else:
                     fields.append((ptype, fname))
-            elif ptype == "Subfind" and \
-              not fh[field].size % fh["/Subfind"].attrs["Number_of_groups"]:
+            elif ptype == "Subhalo" and \
+              not fh[field].size % fh["/Subhalo"].attrs["Number_of_groups"]:
                 # These are actually Group fields, but they were written after 
                 # a load balancing step moved halos around and thus they do not
                 # correspond to the halos stored in the Group group.
-                my_div = fh[field].size / fh["/Subfind"].attrs["Number_of_groups"]
+                my_div = fh[field].size / fh["/Subhalo"].attrs["Number_of_groups"]
                 fname = fh[field].name[fh[field].name.find(ptype) + len(ptype) + 1:]
                 if my_div > 1:
                     for i in range(int(my_div)):

diff -r 9cd43900dce52ab4ff5f59d63b28a5bd6ccdd92c -r 323a5fd343c253c2211eb385d8f5a87a241ad7d1 yt/frontends/gadget_fof/tests/test_outputs.py
--- a/yt/frontends/gadget_fof/tests/test_outputs.py
+++ b/yt/frontends/gadget_fof/tests/test_outputs.py
@@ -13,15 +13,18 @@
 # The full license is in the file COPYING.txt, distributed with this software.
 #-----------------------------------------------------------------------------
 
-import os.path
+import numpy as np
+
+from yt.frontends.gadget_fof.api import \
+    GadgetFOFDataset
 from yt.testing import \
     requires_file, \
-    assert_equal
+    assert_equal, \
+    assert_array_equal
 from yt.utilities.answer_testing.framework import \
     FieldValuesTest, \
     requires_ds, \
     data_dir_load
-from yt.frontends.gadget_fof.api import GadgetFOFDataset
 
 _fields = ("particle_position_x", "particle_position_y",
            "particle_position_z", "particle_velocity_x",
@@ -35,19 +38,51 @@
 
 @requires_ds(g5)
 def test_fields_g5():
-    ds = data_dir_load(g5)
-    yield assert_equal, str(ds), os.path.basename(g5)
     for field in _fields:
         yield FieldValuesTest(g5, field, particle_type=True)
 
 
 @requires_ds(g42)
 def test_fields_g42():
-    ds = data_dir_load(g42)
-    yield assert_equal, str(ds), os.path.basename(g42)
     for field in _fields:
         yield FieldValuesTest(g42, field, particle_type=True)
 
 @requires_file(g42)
 def test_GadgetFOFDataset():
     assert isinstance(data_dir_load(g42), GadgetFOFDataset)
+
+# fof/subhalo catalog with member particles
+g298 = "gadget_halos/data/groups_298/fof_subhalo_tab_298.0.hdf5"
+
+ at requires_file(g298)
+def test_subhalos():
+    ds = data_dir_load(g298)
+    total_sub = 0
+    total_int = 0
+    for hid in range(0, ds.index.particle_count["Group"]):
+        my_h = ds.halo("Group", hid)
+        h_ids = my_h["ID"]
+        for sid in range(my_h["subhalo_number"]):
+            my_s = ds.halo("Subhalo", (my_h.particle_identifier, sid))
+            total_sub += my_s["ID"].size
+            total_int += np.intersect1d(h_ids, my_s["ID"]).size
+
+    # Test that all subhalo particles are contained within
+    # their parent group.
+    yield assert_equal, total_sub, total_int
+
+ at requires_file(g298)
+def test_halo_masses():
+    ds = data_dir_load(g298)
+    ad = ds.all_data()
+    for ptype in ["Group", "Subhalo"]:
+        nhalos = ds.index.particle_count[ptype]
+        mass = ds.arr(np.zeros(nhalos), "code_mass")
+        for i in range(nhalos):
+            halo = ds.halo(ptype, i)
+            mass[i] = halo.mass
+
+        # Check that masses from halo containers are the same
+        # as the array of all masses.  This will test getting
+        # scalar fields for halos correctly.
+        yield assert_array_equal, ad[ptype, "particle_mass"], mass

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.