[yt-svn] commit/yt: 5 new changesets

commits-noreply at bitbucket.org commits-noreply at bitbucket.org
Tue May 16 13:24:37 PDT 2017


5 new commits in yt:

https://bitbucket.org/yt_analysis/yt/commits/52399a9e8d85/
Changeset:   52399a9e8d85
User:        WeiguangCui
Date:        2017-05-01 18:11:21+00:00
Summary:     Backporting PR 2534 from Bitbucket
Affected #:  3 files

diff -r 3eca2ae80ab14a48b643d3055d7d3c0933fa77ae -r 52399a9e8d85d9200d001d3492d11c2ee421465a yt/frontends/gadget/data_structures.py
--- a/yt/frontends/gadget/data_structures.py
+++ b/yt/frontends/gadget/data_structures.py
@@ -42,29 +42,39 @@
 from .fields import \
     GadgetFieldInfo
 
+
 def _fix_unit_ordering(unit):
     if isinstance(unit[0], string_types):
         unit = unit[1], unit[0]
     return unit
-    
+
+
 def _get_gadget_format(filename):
-    # check and return gadget binary format
-    f = open(filename, 'rb')
-    (rhead,) = struct.unpack('<I',f.read(4))
-    f.close()
-    if (rhead == 134217728) | (rhead == 8):
-        return 2
-    elif (rhead == 65536) | (rhead == 256):
-        return 1
+    # check and return gadget binary format with file endianness
+    ff = open(filename, 'rb')
+    (rhead,) = struct.unpack('<I', ff.read(4))
+    ff.close()
+    if (rhead == 134217728):
+        return 2, '>'
+    elif (rhead == 8):
+        return 2, '<'
+    elif (rhead == 65536):
+        return 1, '>'
+    elif (rhead == 256):
+        return 1, '<'
     else:
         raise RuntimeError("Incorrect Gadget format %s!" % str(rhead))
 
+
 class GadgetBinaryFile(ParticleFile):
     def __init__(self, ds, io, filename, file_id):
+        gformat = _get_gadget_format(filename)
         with open(filename, "rb") as f:
-            if _get_gadget_format(filename) == 2:
-                f.seek(f.tell()+16)
-            self.header = read_record(f, ds._header_spec)
+            if gformat[0] == 2:
+                f.seek(f.tell() + 16)
+            self.header = read_record(f, ds._header_spec, endian=gformat[1])
+            if gformat[0] == 2:
+                f.seek(f.tell() + 16)
             self._position_offset = f.tell()
             f.seek(0, os.SEEK_END)
             self._file_size = f.tell()
@@ -76,6 +86,7 @@
             field_list, self.total_particles,
             self._position_offset, self._file_size)
 
+
 class GadgetDataset(SPHDataset):
     _index_class = ParticleIndex
     _file_class = GadgetBinaryFile
@@ -91,13 +102,14 @@
                  over_refine_factor=1,
                  kernel_name=None,
                  index_ptype="all",
-                 bounding_box = None,
-                 header_spec = "default",
-                 field_spec = "default",
-                 ptype_spec = "default",
+                 bounding_box=None,
+                 header_spec="default",
+                 field_spec="default",
+                 ptype_spec="default",
                  units_override=None,
                  unit_system="cgs"):
-        if self._instantiated: return
+        if self._instantiated:
+            return
         self._header_spec = self._setup_binary_spec(
             header_spec, gadget_header_specs)
         self._field_spec = self._setup_binary_spec(
@@ -115,12 +127,12 @@
             bbox = np.array(bounding_box, dtype="float64")
             if bbox.shape == (2, 3):
                 bbox = bbox.transpose()
-            self.domain_left_edge = bbox[:,0]
-            self.domain_right_edge = bbox[:,1]
+            self.domain_left_edge = bbox[:, 0]
+            self.domain_right_edge = bbox[:, 1]
         else:
             self.domain_left_edge = self.domain_right_edge = None
         if units_override is not None:
-            raise RuntimeError("units_override is not supported for GadgetDataset. "+
+            raise RuntimeError("units_override is not supported for GadgetDataset. " +
                                "Use unit_base instead.")
         super(GadgetDataset, self).__init__(
             filename, dataset_type=dataset_type, unit_system=unit_system,
@@ -149,11 +161,11 @@
     def _get_hvals(self):
         # The entries in this header are capitalized and named to match Table 4
         # in the GADGET-2 user guide.
-
+        gformat = _get_gadget_format(self.parameter_filename)
         f = open(self.parameter_filename, 'rb')
-        if _get_gadget_format(self.parameter_filename) == 2:
-            f.seek(f.tell()+16)
-        hvals = read_record(f, self._header_spec)
+        if gformat[0] == 2:
+            f.seek(f.tell() + 16)
+        hvals = read_record(f, self._header_spec, endian=gformat[1])
         for i in hvals:
             if len(hvals[i]) == 1:
                 hvals[i] = hvals[i][0]
@@ -191,7 +203,8 @@
         # It may be possible to deduce whether ComovingIntegration is on
         # somehow, but opinions on this vary.
         if self.omega_lambda == 0.0:
-            only_on_root(mylog.info, "Omega Lambda is 0.0, so we are turning off Cosmology.")
+            only_on_root(
+                mylog.info, "Omega Lambda is 0.0, so we are turning off Cosmology.")
             self.hubble_constant = 1.0  # So that scaling comes out correct
             self.cosmological_simulation = 0
             self.current_redshift = 0.0
@@ -221,14 +234,17 @@
         self.file_count = hvals["NumFiles"]
 
     def _set_code_unit_attributes(self):
-        # If no units passed in by user, set a sane default (Gadget-2 users guide).
+        # If no units passed in by user, set a sane default (Gadget-2 users
+        # guide).
         if self._unit_base is None:
             if self.cosmological_simulation == 1:
-                only_on_root(mylog.info, "Assuming length units are in kpc/h (comoving)")
-                self._unit_base = dict(length = (1.0, "kpccm/h"))
+                only_on_root(
+                    mylog.info, "Assuming length units are in kpc/h (comoving)")
+                self._unit_base = dict(length=(1.0, "kpccm/h"))
             else:
-                only_on_root(mylog.info, "Assuming length units are in kpc (physical)")
-                self._unit_base = dict(length = (1.0, "kpc"))
+                only_on_root(
+                    mylog.info, "Assuming length units are in kpc (physical)")
+                self._unit_base = dict(length=(1.0, "kpc"))
 
         # If units passed in by user, decide what to do about
         # co-moving and factors of h
@@ -305,10 +321,10 @@
         is a Gadget binary file, and endianswap is the endianness character '>' or '<'.
         '''
         try:
-            f = open(filename,'rb')
+            f = open(filename, 'rb')
         except IOError:
             try:
-                f = open(filename+".0")
+                f = open(filename + ".0")
             except IOError:
                 return False, 1
 
@@ -318,7 +334,7 @@
         # The int32 following the header (first 4+256 bytes) must equal this
         # number.
         try:
-            (rhead,) = struct.unpack('<I',f.read(4))
+            (rhead,) = struct.unpack('<I', f.read(4))
         except struct.error:
             f.close()
             return False, 1
@@ -338,11 +354,11 @@
             f.close()
             return False, 1
         # Read in particle number from header
-        np0 = sum(struct.unpack(endianswap+'IIIIII',f.read(6*4)))
+        np0 = sum(struct.unpack(endianswap + 'IIIIII', f.read(6 * 4)))
         # Read in size of position block. It should be 4 bytes per float,
         # with 3 coordinates (x,y,z) per particle. (12 bytes per particle)
-        f.seek(4+256+4,0)
-        np1 = struct.unpack(endianswap+'I',f.read(4))[0]/(4*3)
+        f.seek(4 + 256 + 4, 0)
+        np1 = struct.unpack(endianswap + 'I', f.read(4))[0] / (4 * 3)
         f.close()
         # Compare
         if np0 == np1:
@@ -355,6 +371,7 @@
         # First 4 bytes used to check load
         return GadgetDataset._validate_header(args[0])[0]
 
+
 class GadgetHDF5Dataset(GadgetDataset):
     _file_class = ParticleFile
     _field_info_class = GadgetFieldInfo
@@ -362,17 +379,17 @@
     _suffix = ".hdf5"
 
     def __init__(self, filename, dataset_type="gadget_hdf5",
-                 unit_base = None, n_ref=64,
+                 unit_base=None, n_ref=64,
                  over_refine_factor=1,
                  kernel_name=None,
                  index_ptype="all",
-                 bounding_box = None,
+                 bounding_box=None,
                  units_override=None,
                  unit_system="cgs"):
         self.storage_filename = None
         filename = os.path.abspath(filename)
         if units_override is not None:
-            raise RuntimeError("units_override is not supported for GadgetHDF5Dataset. "+
+            raise RuntimeError("units_override is not supported for GadgetHDF5Dataset. " +
                                "Use unit_base instead.")
         super(GadgetHDF5Dataset, self).__init__(
             filename, dataset_type, unit_base=unit_base, n_ref=n_ref,
@@ -397,8 +414,6 @@
         handle.close()
         return uvals
 
-
-
     def _set_owls_eagle(self):
 
         self.dimensionality = 3
@@ -417,7 +432,8 @@
 
         if self.domain_left_edge is None:
             self.domain_left_edge = np.zeros(3, "float64")
-            self.domain_right_edge = np.ones(3, "float64") * self.parameters["BoxSize"]
+            self.domain_right_edge = np.ones(
+                3, "float64") * self.parameters["BoxSize"]
 
         nz = 1 << self.over_refine_factor
         self.domain_dimensions = np.ones(3, "int32") * nz
@@ -441,9 +457,11 @@
 
         # note the contents of the HDF5 Units group are in _unit_base
         # note the velocity stored on disk is sqrt(a) dx/dt
-        self.length_unit = self.quan(self._unit_base["UnitLength_in_cm"], 'cmcm/h')
+        self.length_unit = self.quan(
+            self._unit_base["UnitLength_in_cm"], 'cmcm/h')
         self.mass_unit = self.quan(self._unit_base["UnitMass_in_g"], 'g/h')
-        self.velocity_unit = self.quan(self._unit_base["UnitVelocity_in_cm_per_s"], 'cm/s')
+        self.velocity_unit = self.quan(
+            self._unit_base["UnitVelocity_in_cm_per_s"], 'cm/s')
         self.time_unit = self.quan(self._unit_base["UnitTime_in_s"], 's/h')
 
     @classmethod
@@ -454,7 +472,7 @@
         try:
             fh = h5py.File(args[0], mode='r')
             valid = all(ng in fh["/"] for ng in need_groups) and \
-              not any(vg in fh["/"] for vg in veto_groups)
+                not any(vg in fh["/"] for vg in veto_groups)
             fh.close()
         except:
             valid = False

diff -r 3eca2ae80ab14a48b643d3055d7d3c0933fa77ae -r 52399a9e8d85d9200d001d3492d11c2ee421465a yt/frontends/gadget/io.py
--- a/yt/frontends/gadget/io.py
+++ b/yt/frontends/gadget/io.py
@@ -39,8 +39,7 @@
     _known_ptypes = gadget_hdf5_ptypes
     _var_mass = None
     _element_names = ('Hydrogen', 'Helium', 'Carbon', 'Nitrogen', 'Oxygen',
-                       'Neon', 'Magnesium', 'Silicon', 'Iron' )
-
+                      'Neon', 'Magnesium', 'Silicon', 'Iron')
 
     @property
     def var_mass(self):
@@ -68,9 +67,9 @@
             for ptype, field_list in sorted(ptf.items()):
                 if data_file.total_particles[ptype] == 0:
                     continue
-                x = f["/%s/Coordinates" % ptype][:,0].astype("float64")
-                y = f["/%s/Coordinates" % ptype][:,1].astype("float64")
-                z = f["/%s/Coordinates" % ptype][:,2].astype("float64")
+                x = f["/%s/Coordinates" % ptype][:, 0].astype("float64")
+                y = f["/%s/Coordinates" % ptype][:, 1].astype("float64")
+                z = f["/%s/Coordinates" % ptype][:, 2].astype("float64")
                 yield ptype, (x, y, z)
             f.close()
 
@@ -88,28 +87,29 @@
                 g = f["/%s" % ptype]
                 coords = g["Coordinates"][:].astype("float64")
                 mask = selector.select_points(
-                            coords[:,0], coords[:,1], coords[:,2], 0.0)
+                    coords[:, 0], coords[:, 1], coords[:, 2], 0.0)
                 del coords
-                if mask is None: continue
+                if mask is None:
+                    continue
                 for field in field_list:
 
                     if field in ("Mass", "Masses") and \
-                        ptype not in self.var_mass:
+                            ptype not in self.var_mass:
                         data = np.empty(mask.sum(), dtype="float64")
                         ind = self._known_ptypes.index(ptype)
                         data[:] = self.ds["Massarr"][ind]
 
                     elif field in self._element_names:
                         rfield = 'ElementAbundance/' + field
-                        data = g[rfield][:][mask,...]
+                        data = g[rfield][:][mask, ...]
                     elif field.startswith("Metallicity_"):
                         col = int(field.rsplit("_", 1)[-1])
-                        data = g["Metallicity"][:,col][mask]
+                        data = g["Metallicity"][:, col][mask]
                     elif field.startswith("Chemistry_"):
                         col = int(field.rsplit("_", 1)[-1])
-                        data = g["ChemistryAbundances"][:,col][mask]
+                        data = g["ChemistryAbundances"][:, col][mask]
                     else:
-                        data = g[field][:][mask,...]
+                        data = g[field][:][mask, ...]
 
                     yield (ptype, field), data
             f.close()
@@ -127,16 +127,18 @@
         morton = np.empty(pcount, dtype='uint64')
         ind = 0
         for key in keys:
-            if not key.startswith("PartType"): continue
-            if "Coordinates" not in f[key]: continue
+            if not key.startswith("PartType"):
+                continue
+            if "Coordinates" not in f[key]:
+                continue
             ds = f[key]["Coordinates"]
-            dt = ds.dtype.newbyteorder("N") # Native
+            dt = ds.dtype.newbyteorder("N")  # Native
             pos = np.empty(ds.shape, dtype=dt)
             pos[:] = ds
             regions.add_data_file(pos, data_file.file_id,
                                   data_file.ds.filter_bbox)
-            morton[ind:ind+pos.shape[0]] = compute_morton(
-                pos[:,0], pos[:,1], pos[:,2],
+            morton[ind:ind + pos.shape[0]] = compute_morton(
+                pos[:, 0], pos[:, 1], pos[:, 2],
                 data_file.ds.domain_left_edge,
                 data_file.ds.domain_right_edge,
                 data_file.ds.filter_bbox)
@@ -151,7 +153,6 @@
         npart = dict(("PartType%s" % (i), v) for i, v in enumerate(pcount))
         return npart
 
-
     def _identify_fields(self, data_file):
         f = h5py.File(data_file.filename, "r")
         fields = []
@@ -205,8 +206,10 @@
         f.close()
         return fields, {}
 
+
 ZeroMass = object()
 
+
 class IOHandlerGadgetBinary(BaseIOHandler):
     _dataset_type = "gadget_binary"
     _vector_fields = (("Coordinates", 3),
@@ -231,13 +234,17 @@
     #   TSTP    (only if enabled in makefile)
 
     _var_mass = None
+    _format = None
 
     def __init__(self, ds, *args, **kwargs):
         self._vector_fields = dict(self._vector_fields)
         self._fields = ds._field_spec
         self._ptypes = ds._ptype_spec
         self.data_files = set([])
-        self._format =  _get_gadget_format(ds.parameter_filename)#default gadget format 1
+        gformat = _get_gadget_format(ds.parameter_filename)
+        # gadget format 1 original, 2 with block name
+        self._format = gformat[0]
+        self._endian = gformat[1]
         super(IOHandlerGadgetBinary, self).__init__(ds, *args, **kwargs)
 
     @property
@@ -266,8 +273,8 @@
                 # This is where we could implement sub-chunking
                 f.seek(poff[ptype, "Coordinates"], os.SEEK_SET)
                 pos = self._read_field_from_file(f,
-                            tp[ptype], "Coordinates")
-                yield ptype, (pos[:,0], pos[:,1], pos[:,2])
+                                                 tp[ptype], "Coordinates")
+                yield ptype, (pos[:, 0], pos[:, 1], pos[:, 2])
             f.close()
 
     def _read_particle_fields(self, chunks, ptf, selector):
@@ -282,11 +289,12 @@
             for ptype, field_list in sorted(ptf.items()):
                 f.seek(poff[ptype, "Coordinates"], os.SEEK_SET)
                 pos = self._read_field_from_file(f,
-                            tp[ptype], "Coordinates")
+                                                 tp[ptype], "Coordinates")
                 mask = selector.select_points(
-                    pos[:,0], pos[:,1], pos[:,2], 0.0)
+                    pos[:, 0], pos[:, 1], pos[:, 2], 0.0)
                 del pos
-                if mask is None: continue
+                if mask is None:
+                    continue
                 for field in field_list:
                     if field == "Mass" and ptype not in self.var_mass:
                         data = np.empty(mask.sum(), dtype="float64")
@@ -297,65 +305,97 @@
                         continue
                     f.seek(poff[ptype, field], os.SEEK_SET)
                     data = self._read_field_from_file(f, tp[ptype], field)
-                    data = data[mask,...]
+                    data = data[mask, ...]
                     yield (ptype, field), data
             f.close()
 
     def _read_field_from_file(self, f, count, name):
-        if count == 0: return
+        if count == 0:
+            return
         if name == "ParticleIDs":
-            dt = "uint32"
+            dt = self._endian + "u4"
         else:
-            dt = "float32"
+            dt = self._endian + "f4"
         if name in self._vector_fields:
             count *= self._vector_fields[name]
-        arr = np.fromfile(f, dtype=dt, count = count)
+        arr = np.fromfile(f, dtype=dt, count=count)
         if name in self._vector_fields:
             factor = self._vector_fields[name]
-            arr = arr.reshape((count//factor, factor), order="C")
+            arr = arr.reshape((count // factor, factor), order="C")
         return arr.astype("float64")
 
     def _initialize_index(self, data_file, regions):
-        count = sum(data_file.total_particles.values())
         DLE = data_file.ds.domain_left_edge
         DRE = data_file.ds.domain_right_edge
-        with open(data_file.filename, "rb") as f:
-            # We add on an additionally 4 for the first record.
-            f.seek(data_file._position_offset + 4)
-            # The first total_particles * 3 values are positions
-            pp = np.fromfile(f, dtype = 'float32', count = count*3)
-            pp.shape = (count, 3)
-        regions.add_data_file(pp, data_file.file_id, data_file.ds.filter_bbox)
-        morton = compute_morton(pp[:,0], pp[:,1], pp[:,2], DLE, DRE,
-                                data_file.ds.filter_bbox)
-        return morton
+        if self.index_ptype == "all":
+            count = sum(data_file.total_particles.values())
+            with open(data_file.filename, "rb") as f:
+                # We add on an additionally 4 for the first record.
+                f.seek(data_file._position_offset + 4)
+                # The first total_particles * 3 values are positions
+                pp = np.fromfile(f, dtype=self._endian + 'f4', count=count * 3)
+                pp.shape = (count, 3)
+                pp = pp.astype(np.float32)
+            regions.add_data_file(pp, data_file.file_id,
+                                  data_file.ds.filter_bbox)
+            morton = compute_morton(pp[:, 0], pp[:, 1], pp[:, 2], DLE, DRE,
+                                    data_file.ds.filter_bbox)
+            return morton
+        else:
+            idpos = self._ptypes.index(self.index_ptype)
+            count = data_file.total_particles.get(self.index_ptype)
+            account = [0, data_file.total_particles.get(self._ptypes[0]),
+                       data_file.total_particles.get(self._ptypes[1]),
+                       data_file.total_particles.get(self._ptypes[2]),
+                       data_file.total_particles.get(self._ptypes[3]),
+                       data_file.total_particles.get(self._ptypes[4])]
+            account = np.cumsum(account)
+            with open(data_file.filename, "rb") as f:
+                # We add on an additionally 4 for the first record and skip
+                # unwanted particles.
+                f.seek(data_file._position_offset + 4 + account[idpos] * 12)
+                # The first total_particles * 3 values are positions
+                pp = np.fromfile(f, dtype=self._endian + 'f4', count=count * 3)
+                pp.shape = (count, 3)
+                pp = pp.astype(np.float32)
+            regions.add_data_file(pp, data_file.file_id,
+                                  data_file.ds.filter_bbox)
+            morton = compute_morton(pp[:, 0], pp[:, 1], pp[:, 2], DLE, DRE,
+                                    data_file.ds.filter_bbox)
+            return morton
 
     def _count_particles(self, data_file):
         npart = dict((self._ptypes[i], v)
-            for i, v in enumerate(data_file.header["Npart"]))
+                     for i, v in enumerate(data_file.header["Npart"]))
         return npart
 
     # header is 256, but we have 4 at beginning and end for ints
     _field_size = 4
+
     def _calculate_field_offsets(self, field_list, pcount,
-                                 offset, file_size = None):
+                                 offset, file_size=None):
         # field_list is (ftype, fname) but the blocks are ordered
         # (fname, ftype) in the file.
-        pos = offset
+        if self._format == 2:
+            pos = offset - 16  # already added in data_structures: L76
+        else:
+            pos = offset
         fs = self._field_size
         offsets = {}
+
         for field in self._fields:
             if not isinstance(field, string_types):
                 field = field[0]
-            if not any( (ptype, field) in field_list
-                        for ptype in self._ptypes):
+            if not any((ptype, field) in field_list
+                       for ptype in self._ptypes):
                 continue
             if self._format == 2:
-                pos += 20 #skip block header
+                pos += 20  # skip block header
             elif self._format == 1:
                 pos += 4
             else:
-                raise RuntimeError("incorrect Gadget format %s!" % str(self._format))
+                raise RuntimeError(
+                    "incorrect Gadget format %s!" % str(self._format))
             any_ptypes = False
             for ptype in self._ptypes:
                 if field == "Mass" and ptype not in self.var_mass:
@@ -369,9 +409,10 @@
                 else:
                     pos += pcount[ptype] * fs
             pos += 4
-            if not any_ptypes: pos -= 8
+            if not any_ptypes:
+                pos -= 8
         if file_size is not None:
-            if (file_size != pos) & (self._format == 1): #ignore the rest of format 2 
+            if (file_size != pos) & (self._format == 1):  # ignore the rest of format 2
                 mylog.warning("Your Gadget-2 file may have extra " +
                               "columns or different precision!" +
                               " (%s file vs %s computed)",
@@ -384,13 +425,15 @@
         tp = domain.total_particles
         for i, ptype in enumerate(self._ptypes):
             count = tp[ptype]
-            if count == 0: continue
+            if count == 0:
+                continue
             m = domain.header["Massarr"][i]
             for field in self._fields:
                 if isinstance(field, tuple):
                     field, req = field
                     if req is ZeroMass:
-                        if m > 0.0 : continue
+                        if m > 0.0:
+                            continue
                     elif isinstance(req, tuple) and ptype in req:
                         pass
                     elif req != ptype:

diff -r 3eca2ae80ab14a48b643d3055d7d3c0933fa77ae -r 52399a9e8d85d9200d001d3492d11c2ee421465a yt/frontends/gadget/tests/test_outputs.py
--- a/yt/frontends/gadget/tests/test_outputs.py
+++ b/yt/frontends/gadget/tests/test_outputs.py
@@ -25,6 +25,7 @@
 
 isothermal_h5 = "IsothermalCollapse/snap_505.hdf5"
 isothermal_bin = "IsothermalCollapse/snap_505"
+BE_Gadget = "BigEndianGadgetBinary/BigEndianGadgetBinary"
 
 # This maps from field names to weight field names to use for projections
 iso_fields = OrderedDict(
@@ -41,13 +42,17 @@
 )
 iso_kwargs = dict(bounding_box=[[-3, 3], [-3, 3], [-3, 3]])
 
+
 @requires_file(isothermal_h5)
 @requires_file(isothermal_bin)
+ at requires_file(BE_Gadget)
 def test_GadgetDataset():
     assert isinstance(data_dir_load(isothermal_h5, kwargs=iso_kwargs),
                       GadgetHDF5Dataset)
     assert isinstance(data_dir_load(isothermal_bin, kwargs=iso_kwargs),
                       GadgetDataset)
+    assert isinstance(data_dir_load(BE_Gadget, kwargs=''),
+                      GadgetDataset)
 
 
 @requires_ds(isothermal_h5)


https://bitbucket.org/yt_analysis/yt/commits/b51d512f9f56/
Changeset:   b51d512f9f56
User:        WeiguangCui
Date:        2017-05-01 18:12:38+00:00
Summary:     Backporting PR 2560 from Bitbucket
Affected #:  1 file

diff -r 52399a9e8d85d9200d001d3492d11c2ee421465a -r b51d512f9f56b552480fdc57769e40062d32f073 doc/source/examining/loading_data.rst
--- a/doc/source/examining/loading_data.rst
+++ b/doc/source/examining/loading_data.rst
@@ -960,8 +960,10 @@
    ds = yt.load("snapshot_061.hdf5", index_ptype="PartType0")
 
 By default, ``index_ptype`` is set to ``"all"``, which means all the particles.
-Currently this feature only works for the Gadget HDF5 and OWLS datasets. To
-bring the feature to other frontends, it's recommended to refer to this
+Currently this feature only works for the Gadget HDF5 and OWLS datasets.
+For binary Gadget dataset, the ``index_ptype`` should be set according to the default
+``gadget_ptype_specs``, i.e., ``"Gas"``.
+To bring the feature to other frontends, it's recommended to refer to this
 `PR <https://bitbucket.org/yt_analysis/yt/pull-requests/1985/add-particle-type-aware-octree/diff>`_
 for implementation details.
 


https://bitbucket.org/yt_analysis/yt/commits/7ac29c0f94cc/
Changeset:   7ac29c0f94cc
User:        ngoldbaum
Date:        2017-05-01 18:19:22+00:00
Summary:     Merging and fixing conflicts
Affected #:  4 files

diff -r 09d6e114abd2fed4d527359f6c9ed87a59a1e1ac -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c doc/source/examining/loading_data.rst
--- a/doc/source/examining/loading_data.rst
+++ b/doc/source/examining/loading_data.rst
@@ -960,10 +960,10 @@
    ds = yt.load("snapshot_061.hdf5", index_ptype="PartType0")
 
 By default, ``index_ptype`` is set to ``"all"``, which means all the particles.
-Currently this feature only works for the Gadget HDF5 and OWLS datasets. To
-bring the feature to other frontends, it's recommended to refer to this
-`PR <https://bitbucket.org/yt_analysis/yt/pull-requests/1985/add-particle-type-aware-octree/diff>`_
-for implementation details.
+For Gadget binary outputs, ``index_ptype`` should be set using the particle type
+names yt uses internally (e.g. ``'Gas'``, ``'Halo'``, ``'Disk'``, etc). For
+Gadget HDF5 outputs the particle type names come from the HDF5 output and so
+should be referred to using names like ``'PartType0'``.
 
 .. _gadget-field-spec:
 

diff -r 09d6e114abd2fed4d527359f6c9ed87a59a1e1ac -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c yt/frontends/gadget/data_structures.py
--- a/yt/frontends/gadget/data_structures.py
+++ b/yt/frontends/gadget/data_structures.py
@@ -42,29 +42,39 @@
 from .fields import \
     GadgetFieldInfo
 
+
 def _fix_unit_ordering(unit):
     if isinstance(unit[0], string_types):
         unit = unit[1], unit[0]
     return unit
-    
+
+
 def _get_gadget_format(filename):
-    # check and return gadget binary format
-    f = open(filename, 'rb')
-    (rhead,) = struct.unpack('<I',f.read(4))
-    f.close()
-    if (rhead == 134217728) | (rhead == 8):
-        return 2
-    elif (rhead == 65536) | (rhead == 256):
-        return 1
+    # check and return gadget binary format with file endianness
+    ff = open(filename, 'rb')
+    (rhead,) = struct.unpack('<I', ff.read(4))
+    ff.close()
+    if (rhead == 134217728):
+        return 2, '>'
+    elif (rhead == 8):
+        return 2, '<'
+    elif (rhead == 65536):
+        return 1, '>'
+    elif (rhead == 256):
+        return 1, '<'
     else:
         raise RuntimeError("Incorrect Gadget format %s!" % str(rhead))
 
+
 class GadgetBinaryFile(ParticleFile):
     def __init__(self, ds, io, filename, file_id):
+        gformat = _get_gadget_format(filename)
         with open(filename, "rb") as f:
-            if _get_gadget_format(filename) == 2:
-                f.seek(f.tell()+16)
-            self.header = read_record(f, ds._header_spec)
+            if gformat[0] == 2:
+                f.seek(f.tell() + 16)
+            self.header = read_record(f, ds._header_spec, endian=gformat[1])
+            if gformat[0] == 2:
+                f.seek(f.tell() + 16)
             self._position_offset = f.tell()
             f.seek(0, os.SEEK_END)
             self._file_size = f.tell()
@@ -76,6 +86,7 @@
             field_list, self.total_particles,
             self._position_offset, self._file_size)
 
+
 class GadgetDataset(SPHDataset):
     _index_class = ParticleIndex
     _file_class = GadgetBinaryFile
@@ -91,16 +102,17 @@
                  over_refine_factor=1,
                  kernel_name=None,
                  index_ptype="all",
-                 bounding_box = None,
-                 header_spec = "default",
-                 field_spec = "default",
-                 ptype_spec = "default",
+                 bounding_box=None,
+                 header_spec="default",
+                 field_spec="default",
+                 ptype_spec="default",
                  units_override=None,
                  unit_system="cgs",
                  use_dark_factor = False,
                  w_0 = -1.0,
                  w_a = 0.0):
-        if self._instantiated: return
+        if self._instantiated:
+            return
         self._header_spec = self._setup_binary_spec(
             header_spec, gadget_header_specs)
         self._field_spec = self._setup_binary_spec(
@@ -118,12 +130,12 @@
             bbox = np.array(bounding_box, dtype="float64")
             if bbox.shape == (2, 3):
                 bbox = bbox.transpose()
-            self.domain_left_edge = bbox[:,0]
-            self.domain_right_edge = bbox[:,1]
+            self.domain_left_edge = bbox[:, 0]
+            self.domain_right_edge = bbox[:, 1]
         else:
             self.domain_left_edge = self.domain_right_edge = None
         if units_override is not None:
-            raise RuntimeError("units_override is not supported for GadgetDataset. "+
+            raise RuntimeError("units_override is not supported for GadgetDataset. " +
                                "Use unit_base instead.")
 
         # Set dark energy parameters before cosmology object is created
@@ -158,11 +170,11 @@
     def _get_hvals(self):
         # The entries in this header are capitalized and named to match Table 4
         # in the GADGET-2 user guide.
-
+        gformat = _get_gadget_format(self.parameter_filename)
         f = open(self.parameter_filename, 'rb')
-        if _get_gadget_format(self.parameter_filename) == 2:
-            f.seek(f.tell()+16)
-        hvals = read_record(f, self._header_spec)
+        if gformat[0] == 2:
+            f.seek(f.tell() + 16)
+        hvals = read_record(f, self._header_spec, endian=gformat[1])
         for i in hvals:
             if len(hvals[i]) == 1:
                 hvals[i] = hvals[i][0]
@@ -200,7 +212,8 @@
         # It may be possible to deduce whether ComovingIntegration is on
         # somehow, but opinions on this vary.
         if self.omega_lambda == 0.0:
-            only_on_root(mylog.info, "Omega Lambda is 0.0, so we are turning off Cosmology.")
+            only_on_root(
+                mylog.info, "Omega Lambda is 0.0, so we are turning off Cosmology.")
             self.hubble_constant = 1.0  # So that scaling comes out correct
             self.cosmological_simulation = 0
             self.current_redshift = 0.0
@@ -230,14 +243,17 @@
         self.file_count = hvals["NumFiles"]
 
     def _set_code_unit_attributes(self):
-        # If no units passed in by user, set a sane default (Gadget-2 users guide).
+        # If no units passed in by user, set a sane default (Gadget-2 users
+        # guide).
         if self._unit_base is None:
             if self.cosmological_simulation == 1:
-                only_on_root(mylog.info, "Assuming length units are in kpc/h (comoving)")
-                self._unit_base = dict(length = (1.0, "kpccm/h"))
+                only_on_root(
+                    mylog.info, "Assuming length units are in kpc/h (comoving)")
+                self._unit_base = dict(length=(1.0, "kpccm/h"))
             else:
-                only_on_root(mylog.info, "Assuming length units are in kpc (physical)")
-                self._unit_base = dict(length = (1.0, "kpc"))
+                only_on_root(
+                    mylog.info, "Assuming length units are in kpc (physical)")
+                self._unit_base = dict(length=(1.0, "kpc"))
 
         # If units passed in by user, decide what to do about
         # co-moving and factors of h
@@ -314,10 +330,10 @@
         is a Gadget binary file, and endianswap is the endianness character '>' or '<'.
         '''
         try:
-            f = open(filename,'rb')
+            f = open(filename, 'rb')
         except IOError:
             try:
-                f = open(filename+".0")
+                f = open(filename + ".0")
             except IOError:
                 return False, 1
 
@@ -327,7 +343,7 @@
         # The int32 following the header (first 4+256 bytes) must equal this
         # number.
         try:
-            (rhead,) = struct.unpack('<I',f.read(4))
+            (rhead,) = struct.unpack('<I', f.read(4))
         except struct.error:
             f.close()
             return False, 1
@@ -347,11 +363,11 @@
             f.close()
             return False, 1
         # Read in particle number from header
-        np0 = sum(struct.unpack(endianswap+'IIIIII',f.read(6*4)))
+        np0 = sum(struct.unpack(endianswap + 'IIIIII', f.read(6 * 4)))
         # Read in size of position block. It should be 4 bytes per float,
         # with 3 coordinates (x,y,z) per particle. (12 bytes per particle)
-        f.seek(4+256+4,0)
-        np1 = struct.unpack(endianswap+'I',f.read(4))[0]/(4*3)
+        f.seek(4 + 256 + 4, 0)
+        np1 = struct.unpack(endianswap + 'I', f.read(4))[0] / (4 * 3)
         f.close()
         # Compare
         if np0 == np1:
@@ -366,6 +382,7 @@
         # First 4 bytes used to check load
         return GadgetDataset._validate_header(args[0])[0]
 
+
 class GadgetHDF5Dataset(GadgetDataset):
     _file_class = ParticleFile
     _field_info_class = GadgetFieldInfo
@@ -373,17 +390,17 @@
     _suffix = ".hdf5"
 
     def __init__(self, filename, dataset_type="gadget_hdf5",
-                 unit_base = None, n_ref=64,
+                 unit_base=None, n_ref=64,
                  over_refine_factor=1,
                  kernel_name=None,
                  index_ptype="all",
-                 bounding_box = None,
+                 bounding_box=None,
                  units_override=None,
                  unit_system="cgs"):
         self.storage_filename = None
         filename = os.path.abspath(filename)
         if units_override is not None:
-            raise RuntimeError("units_override is not supported for GadgetHDF5Dataset. "+
+            raise RuntimeError("units_override is not supported for GadgetHDF5Dataset. " +
                                "Use unit_base instead.")
         super(GadgetHDF5Dataset, self).__init__(
             filename, dataset_type, unit_base=unit_base, n_ref=n_ref,
@@ -408,8 +425,6 @@
         handle.close()
         return uvals
 
-
-
     def _set_owls_eagle(self):
 
         self.dimensionality = 3
@@ -428,7 +443,8 @@
 
         if self.domain_left_edge is None:
             self.domain_left_edge = np.zeros(3, "float64")
-            self.domain_right_edge = np.ones(3, "float64") * self.parameters["BoxSize"]
+            self.domain_right_edge = np.ones(
+                3, "float64") * self.parameters["BoxSize"]
 
         nz = 1 << self.over_refine_factor
         self.domain_dimensions = np.ones(3, "int32") * nz
@@ -452,9 +468,11 @@
 
         # note the contents of the HDF5 Units group are in _unit_base
         # note the velocity stored on disk is sqrt(a) dx/dt
-        self.length_unit = self.quan(self._unit_base["UnitLength_in_cm"], 'cmcm/h')
+        self.length_unit = self.quan(
+            self._unit_base["UnitLength_in_cm"], 'cmcm/h')
         self.mass_unit = self.quan(self._unit_base["UnitMass_in_g"], 'g/h')
-        self.velocity_unit = self.quan(self._unit_base["UnitVelocity_in_cm_per_s"], 'cm/s')
+        self.velocity_unit = self.quan(
+            self._unit_base["UnitVelocity_in_cm_per_s"], 'cm/s')
         self.time_unit = self.quan(self._unit_base["UnitTime_in_s"], 's/h')
 
     @classmethod
@@ -465,7 +483,7 @@
         try:
             fh = h5py.File(args[0], mode='r')
             valid = all(ng in fh["/"] for ng in need_groups) and \
-              not any(vg in fh["/"] for vg in veto_groups)
+                not any(vg in fh["/"] for vg in veto_groups)
             fh.close()
         except:
             valid = False

diff -r 09d6e114abd2fed4d527359f6c9ed87a59a1e1ac -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c yt/frontends/gadget/io.py
--- a/yt/frontends/gadget/io.py
+++ b/yt/frontends/gadget/io.py
@@ -39,8 +39,7 @@
     _known_ptypes = gadget_hdf5_ptypes
     _var_mass = None
     _element_names = ('Hydrogen', 'Helium', 'Carbon', 'Nitrogen', 'Oxygen',
-                       'Neon', 'Magnesium', 'Silicon', 'Iron' )
-
+                      'Neon', 'Magnesium', 'Silicon', 'Iron')
 
     @property
     def var_mass(self):
@@ -68,9 +67,9 @@
             for ptype, field_list in sorted(ptf.items()):
                 if data_file.total_particles[ptype] == 0:
                     continue
-                x = f["/%s/Coordinates" % ptype][:,0].astype("float64")
-                y = f["/%s/Coordinates" % ptype][:,1].astype("float64")
-                z = f["/%s/Coordinates" % ptype][:,2].astype("float64")
+                x = f["/%s/Coordinates" % ptype][:, 0].astype("float64")
+                y = f["/%s/Coordinates" % ptype][:, 1].astype("float64")
+                z = f["/%s/Coordinates" % ptype][:, 2].astype("float64")
                 yield ptype, (x, y, z)
             f.close()
 
@@ -88,28 +87,29 @@
                 g = f["/%s" % ptype]
                 coords = g["Coordinates"][:].astype("float64")
                 mask = selector.select_points(
-                            coords[:,0], coords[:,1], coords[:,2], 0.0)
+                    coords[:, 0], coords[:, 1], coords[:, 2], 0.0)
                 del coords
-                if mask is None: continue
+                if mask is None:
+                    continue
                 for field in field_list:
 
                     if field in ("Mass", "Masses") and \
-                        ptype not in self.var_mass:
+                            ptype not in self.var_mass:
                         data = np.empty(mask.sum(), dtype="float64")
                         ind = self._known_ptypes.index(ptype)
                         data[:] = self.ds["Massarr"][ind]
 
                     elif field in self._element_names:
                         rfield = 'ElementAbundance/' + field
-                        data = g[rfield][:][mask,...]
+                        data = g[rfield][:][mask, ...]
                     elif field.startswith("Metallicity_"):
                         col = int(field.rsplit("_", 1)[-1])
-                        data = g["Metallicity"][:,col][mask]
+                        data = g["Metallicity"][:, col][mask]
                     elif field.startswith("Chemistry_"):
                         col = int(field.rsplit("_", 1)[-1])
-                        data = g["ChemistryAbundances"][:,col][mask]
+                        data = g["ChemistryAbundances"][:, col][mask]
                     else:
-                        data = g[field][:][mask,...]
+                        data = g[field][:][mask, ...]
 
                     yield (ptype, field), data
             f.close()
@@ -127,16 +127,18 @@
         morton = np.empty(pcount, dtype='uint64')
         ind = 0
         for key in keys:
-            if not key.startswith("PartType"): continue
-            if "Coordinates" not in f[key]: continue
+            if not key.startswith("PartType"):
+                continue
+            if "Coordinates" not in f[key]:
+                continue
             ds = f[key]["Coordinates"]
-            dt = ds.dtype.newbyteorder("N") # Native
+            dt = ds.dtype.newbyteorder("N")  # Native
             pos = np.empty(ds.shape, dtype=dt)
             pos[:] = ds
             regions.add_data_file(pos, data_file.file_id,
                                   data_file.ds.filter_bbox)
-            morton[ind:ind+pos.shape[0]] = compute_morton(
-                pos[:,0], pos[:,1], pos[:,2],
+            morton[ind:ind + pos.shape[0]] = compute_morton(
+                pos[:, 0], pos[:, 1], pos[:, 2],
                 data_file.ds.domain_left_edge,
                 data_file.ds.domain_right_edge,
                 data_file.ds.filter_bbox)
@@ -151,7 +153,6 @@
         npart = dict(("PartType%s" % (i), v) for i, v in enumerate(pcount))
         return npart
 
-
     def _identify_fields(self, data_file):
         f = h5py.File(data_file.filename, "r")
         fields = []
@@ -205,8 +206,10 @@
         f.close()
         return fields, {}
 
+
 ZeroMass = object()
 
+
 class IOHandlerGadgetBinary(BaseIOHandler):
     _dataset_type = "gadget_binary"
     _vector_fields = (("Coordinates", 3),
@@ -231,13 +234,17 @@
     #   TSTP    (only if enabled in makefile)
 
     _var_mass = None
+    _format = None
 
     def __init__(self, ds, *args, **kwargs):
         self._vector_fields = dict(self._vector_fields)
         self._fields = ds._field_spec
         self._ptypes = ds._ptype_spec
         self.data_files = set([])
-        self._format =  _get_gadget_format(ds.parameter_filename)#default gadget format 1
+        gformat = _get_gadget_format(ds.parameter_filename)
+        # gadget format 1 original, 2 with block name
+        self._format = gformat[0]
+        self._endian = gformat[1]
         super(IOHandlerGadgetBinary, self).__init__(ds, *args, **kwargs)
 
     @property
@@ -266,8 +273,8 @@
                 # This is where we could implement sub-chunking
                 f.seek(poff[ptype, "Coordinates"], os.SEEK_SET)
                 pos = self._read_field_from_file(f,
-                            tp[ptype], "Coordinates")
-                yield ptype, (pos[:,0], pos[:,1], pos[:,2])
+                                                 tp[ptype], "Coordinates")
+                yield ptype, (pos[:, 0], pos[:, 1], pos[:, 2])
             f.close()
 
     def _read_particle_fields(self, chunks, ptf, selector):
@@ -282,11 +289,12 @@
             for ptype, field_list in sorted(ptf.items()):
                 f.seek(poff[ptype, "Coordinates"], os.SEEK_SET)
                 pos = self._read_field_from_file(f,
-                            tp[ptype], "Coordinates")
+                                                 tp[ptype], "Coordinates")
                 mask = selector.select_points(
-                    pos[:,0], pos[:,1], pos[:,2], 0.0)
+                    pos[:, 0], pos[:, 1], pos[:, 2], 0.0)
                 del pos
-                if mask is None: continue
+                if mask is None:
+                    continue
                 for field in field_list:
                     if field == "Mass" and ptype not in self.var_mass:
                         data = np.empty(mask.sum(), dtype="float64")
@@ -297,66 +305,88 @@
                         continue
                     f.seek(poff[ptype, field], os.SEEK_SET)
                     data = self._read_field_from_file(f, tp[ptype], field)
-                    data = data[mask,...]
+                    data = data[mask, ...]
                     yield (ptype, field), data
             f.close()
 
     def _read_field_from_file(self, f, count, name):
-        if count == 0: return
+        if count == 0:
+            return
         if name == "ParticleIDs":
-            dt = "uint32"
+            dt = self._endian + "u4"
         else:
-            dt = self._float_type
+            dt = self._endian + self._float_type
         if name in self._vector_fields:
             count *= self._vector_fields[name]
-        arr = np.fromfile(f, dtype=dt, count = count)
+        arr = np.fromfile(f, dtype=dt, count=count)
         if name in self._vector_fields:
             factor = self._vector_fields[name]
-            arr = arr.reshape((count//factor, factor), order="C")
-        return arr.astype("float64")
+            arr = arr.reshape((count // factor, factor), order="C")
+        return arr.astype(self._float_type)
 
-    def _initialize_index(self, data_file, regions):
-        count = sum(data_file.total_particles.values())
-        DLE = data_file.ds.domain_left_edge
-        DRE = data_file.ds.domain_right_edge
-        self._float_type = data_file.ds._validate_header(data_file.filename)[1]
-        self._field_size = np.dtype(self._float_type).itemsize
+    def get_morton_from_position(self, data_file, count, offset_count,
+                                 regions, DLE, DRE):
         with open(data_file.filename, "rb") as f:
             # We add on an additionally 4 for the first record.
-            f.seek(data_file._position_offset + 4)
+            f.seek(data_file._position_offset + 4 + offset_count * 12)
             # The first total_particles * 3 values are positions
-            pp = np.fromfile(f, dtype=self._float_type, count=count*3)
+            pp = np.fromfile(f, dtype=self._endian + self._float_type,
+                             count=count * 3)
             pp.shape = (count, 3)
-        regions.add_data_file(pp, data_file.file_id, data_file.ds.filter_bbox)
-        morton = compute_morton(pp[:,0], pp[:,1], pp[:,2], DLE, DRE,
+            pp = pp.astype(np._float_type)
+        regions.add_data_file(pp, data_file.file_id,
+                                  data_file.ds.filter_bbox)
+        morton = compute_morton(pp[:, 0], pp[:, 1], pp[:, 2], DLE, DRE,
                                 data_file.ds.filter_bbox)
         return morton
 
+    def _initialize_index(self, data_file, regions):
+        DLE = data_file.ds.domain_left_edge
+        DRE = data_file.ds.domain_right_edge
+        if self.index_ptype == "all":
+            count = sum(data_file.total_particles.values())
+            return self.get_morton_from_position(
+                data_file, count, 0, regions, DLE, DRE)
+        else:
+            idpos = self._ptypes.index(self.index_ptype)
+            count = data_file.total_particles.get(self.index_ptype)
+            account = [0] + [data_file.total_particles.get(ptype)
+                             for ptype in self._ptypes]
+            account = np.cumsum(account)
+            return self.get_morton_from_position(
+                data_file, account, account[idpos], regions, DLE, DRE)
+
     def _count_particles(self, data_file):
         npart = dict((self._ptypes[i], v)
-            for i, v in enumerate(data_file.header["Npart"]))
+                     for i, v in enumerate(data_file.header["Npart"]))
         return npart
 
     # header is 256, but we have 4 at beginning and end for ints
+    _field_size = 4
     def _calculate_field_offsets(self, field_list, pcount,
-                                 offset, file_size = None):
+                                 offset, file_size=None):
         # field_list is (ftype, fname) but the blocks are ordered
         # (fname, ftype) in the file.
-        pos = offset
+        if self._format == 2:
+            pos = offset - 16  # already added in data_structures: L76
+        else:
+            pos = offset
         fs = self._field_size
         offsets = {}
+
         for field in self._fields:
             if not isinstance(field, string_types):
                 field = field[0]
-            if not any( (ptype, field) in field_list
-                        for ptype in self._ptypes):
+            if not any((ptype, field) in field_list
+                       for ptype in self._ptypes):
                 continue
             if self._format == 2:
-                pos += 20 #skip block header
+                pos += 20  # skip block header
             elif self._format == 1:
                 pos += 4
             else:
-                raise RuntimeError("incorrect Gadget format %s!" % str(self._format))
+                raise RuntimeError(
+                    "incorrect Gadget format %s!" % str(self._format))
             any_ptypes = False
             for ptype in self._ptypes:
                 if field == "Mass" and ptype not in self.var_mass:
@@ -370,9 +400,10 @@
                 else:
                     pos += pcount[ptype] * fs
             pos += 4
-            if not any_ptypes: pos -= 8
+            if not any_ptypes:
+                pos -= 8
         if file_size is not None:
-            if (file_size != pos) & (self._format == 1): #ignore the rest of format 2 
+            if (file_size != pos) & (self._format == 1):  # ignore the rest of format 2
                 mylog.warning("Your Gadget-2 file may have extra " +
                               "columns or different precision!" +
                               " (%s file vs %s computed)",
@@ -385,13 +416,15 @@
         tp = domain.total_particles
         for i, ptype in enumerate(self._ptypes):
             count = tp[ptype]
-            if count == 0: continue
+            if count == 0:
+                continue
             m = domain.header["Massarr"][i]
             for field in self._fields:
                 if isinstance(field, tuple):
                     field, req = field
                     if req is ZeroMass:
-                        if m > 0.0 : continue
+                        if m > 0.0:
+                            continue
                     elif isinstance(req, tuple) and ptype in req:
                         pass
                     elif req != ptype:

diff -r 09d6e114abd2fed4d527359f6c9ed87a59a1e1ac -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c yt/frontends/gadget/tests/test_outputs.py
--- a/yt/frontends/gadget/tests/test_outputs.py
+++ b/yt/frontends/gadget/tests/test_outputs.py
@@ -25,6 +25,7 @@
 
 isothermal_h5 = "IsothermalCollapse/snap_505.hdf5"
 isothermal_bin = "IsothermalCollapse/snap_505"
+BE_Gadget = "BigEndianGadgetBinary/BigEndianGadgetBinary"
 
 # This maps from field names to weight field names to use for projections
 iso_fields = OrderedDict(
@@ -41,13 +42,17 @@
 )
 iso_kwargs = dict(bounding_box=[[-3, 3], [-3, 3], [-3, 3]])
 
+
 @requires_file(isothermal_h5)
 @requires_file(isothermal_bin)
+ at requires_file(BE_Gadget)
 def test_GadgetDataset():
     assert isinstance(data_dir_load(isothermal_h5, kwargs=iso_kwargs),
                       GadgetHDF5Dataset)
     assert isinstance(data_dir_load(isothermal_bin, kwargs=iso_kwargs),
                       GadgetDataset)
+    assert isinstance(data_dir_load(BE_Gadget, kwargs=''),
+                      GadgetDataset)
 
 
 @requires_ds(isothermal_h5)


https://bitbucket.org/yt_analysis/yt/commits/232eb1a37e14/
Changeset:   232eb1a37e14
User:        ngoldbaum
Date:        2017-05-02 21:06:54+00:00
Summary:     fix I/O for big endian gadget binary data
Affected #:  3 files

diff -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c -r 232eb1a37e146647ccb9a226d622b84745268416 yt/frontends/gadget/data_structures.py
--- a/yt/frontends/gadget/data_structures.py
+++ b/yt/frontends/gadget/data_structures.py
@@ -37,7 +37,8 @@
 from .definitions import \
     gadget_header_specs, \
     gadget_field_specs, \
-    gadget_ptype_specs
+    gadget_ptype_specs, \
+    SNAP_FORMAT_2_OFFSET
 
 from .fields import \
     GadgetFieldInfo
@@ -71,10 +72,10 @@
         gformat = _get_gadget_format(filename)
         with open(filename, "rb") as f:
             if gformat[0] == 2:
-                f.seek(f.tell() + 16)
+                f.seek(f.tell() + SNAP_FORMAT_2_OFFSET)
             self.header = read_record(f, ds._header_spec, endian=gformat[1])
             if gformat[0] == 2:
-                f.seek(f.tell() + 16)
+                f.seek(f.tell() + SNAP_FORMAT_2_OFFSET)
             self._position_offset = f.tell()
             f.seek(0, os.SEEK_END)
             self._file_size = f.tell()
@@ -173,7 +174,7 @@
         gformat = _get_gadget_format(self.parameter_filename)
         f = open(self.parameter_filename, 'rb')
         if gformat[0] == 2:
-            f.seek(f.tell() + 16)
+            f.seek(f.tell() + SNAP_FORMAT_2_OFFSET)
         hvals = read_record(f, self._header_spec, endian=gformat[1])
         for i in hvals:
             if len(hvals[i]) == 1:
@@ -355,10 +356,10 @@
         # Enabled Format2 here
         elif rhead == 8:
             f.close()
-            return True, 'float32'
+            return True, 'f8'
         elif rhead == 134217728:
             f.close()
-            return True, 'float32'
+            return True, 'f4'
         else:
             f.close()
             return False, 1
@@ -371,9 +372,9 @@
         f.close()
         # Compare
         if np0 == np1:
-            return True, 'float32'
+            return True, 'f4'
         elif np1 == 2*np0:
-            return True, 'float64'
+            return True, 'f8'
         else:
             return False, 1
 

diff -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c -r 232eb1a37e146647ccb9a226d622b84745268416 yt/frontends/gadget/definitions.py
--- a/yt/frontends/gadget/definitions.py
+++ b/yt/frontends/gadget/definitions.py
@@ -93,3 +93,5 @@
     "PartType4",
     "PartType5"
 )
+
+SNAP_FORMAT_2_OFFSET = 16

diff -r 7ac29c0f94cc7ff546c0783c4cfd7e355341fe3c -r 232eb1a37e146647ccb9a226d622b84745268416 yt/frontends/gadget/io.py
--- a/yt/frontends/gadget/io.py
+++ b/yt/frontends/gadget/io.py
@@ -30,7 +30,8 @@
     _get_gadget_format
 
 from .definitions import \
-    gadget_hdf5_ptypes
+    gadget_hdf5_ptypes, \
+    SNAP_FORMAT_2_OFFSET
 
 
 class IOHandlerGadgetHDF5(BaseIOHandler):
@@ -324,8 +325,8 @@
             arr = arr.reshape((count // factor, factor), order="C")
         return arr.astype(self._float_type)
 
-    def get_morton_from_position(self, data_file, count, offset_count,
-                                 regions, DLE, DRE):
+    def _get_morton_from_position(self, data_file, count, offset_count,
+                                  regions, DLE, DRE):
         with open(data_file.filename, "rb") as f:
             # We add on an additionally 4 for the first record.
             f.seek(data_file._position_offset + 4 + offset_count * 12)
@@ -333,7 +334,7 @@
             pp = np.fromfile(f, dtype=self._endian + self._float_type,
                              count=count * 3)
             pp.shape = (count, 3)
-            pp = pp.astype(np._float_type)
+            pp = pp.astype(self._float_type)
         regions.add_data_file(pp, data_file.file_id,
                                   data_file.ds.filter_bbox)
         morton = compute_morton(pp[:, 0], pp[:, 1], pp[:, 2], DLE, DRE,
@@ -343,9 +344,10 @@
     def _initialize_index(self, data_file, regions):
         DLE = data_file.ds.domain_left_edge
         DRE = data_file.ds.domain_right_edge
+        self._float_type = data_file.ds._validate_header(data_file.filename)[1]
         if self.index_ptype == "all":
             count = sum(data_file.total_particles.values())
-            return self.get_morton_from_position(
+            return self._get_morton_from_position(
                 data_file, count, 0, regions, DLE, DRE)
         else:
             idpos = self._ptypes.index(self.index_ptype)
@@ -353,7 +355,7 @@
             account = [0] + [data_file.total_particles.get(ptype)
                              for ptype in self._ptypes]
             account = np.cumsum(account)
-            return self.get_morton_from_position(
+            return self._get_morton_from_position(
                 data_file, account, account[idpos], regions, DLE, DRE)
 
     def _count_particles(self, data_file):
@@ -368,7 +370,8 @@
         # field_list is (ftype, fname) but the blocks are ordered
         # (fname, ftype) in the file.
         if self._format == 2:
-            pos = offset - 16  # already added in data_structures: L76
+            # Need to subtract offset due to extra header block
+            pos = offset - SNAP_FORMAT_2_OFFSET
         else:
             pos = offset
         fs = self._field_size


https://bitbucket.org/yt_analysis/yt/commits/2cda15e1ef4d/
Changeset:   2cda15e1ef4d
User:        ngoldbaum
Date:        2017-05-16 20:24:23+00:00
Summary:     Merge pull request #1355 from Xarthisius/weiguang_merge

Correctly read in big endian gadget data
Affected #:  5 files

diff -r e78cd10a6d5cb98eeba67425c7f984bd9c72bf9d -r 2cda15e1ef4d6accc2c6f295789a87a747d43331 doc/source/examining/loading_data.rst
--- a/doc/source/examining/loading_data.rst
+++ b/doc/source/examining/loading_data.rst
@@ -1010,10 +1010,10 @@
    ds = yt.load("snapshot_061.hdf5", index_ptype="PartType0")
 
 By default, ``index_ptype`` is set to ``"all"``, which means all the particles.
-Currently this feature only works for the Gadget HDF5 and OWLS datasets. To
-bring the feature to other frontends, it's recommended to refer to this
-`PR <https://bitbucket.org/yt_analysis/yt/pull-requests/1985/add-particle-type-aware-octree/diff>`_
-for implementation details.
+For Gadget binary outputs, ``index_ptype`` should be set using the particle type
+names yt uses internally (e.g. ``'Gas'``, ``'Halo'``, ``'Disk'``, etc). For
+Gadget HDF5 outputs the particle type names come from the HDF5 output and so
+should be referred to using names like ``'PartType0'``.
 
 .. _gadget-field-spec:
 

diff -r e78cd10a6d5cb98eeba67425c7f984bd9c72bf9d -r 2cda15e1ef4d6accc2c6f295789a87a747d43331 yt/frontends/gadget/data_structures.py
--- a/yt/frontends/gadget/data_structures.py
+++ b/yt/frontends/gadget/data_structures.py
@@ -37,34 +37,45 @@
 from .definitions import \
     gadget_header_specs, \
     gadget_field_specs, \
-    gadget_ptype_specs
+    gadget_ptype_specs, \
+    SNAP_FORMAT_2_OFFSET
 
 from .fields import \
     GadgetFieldInfo
 
+
 def _fix_unit_ordering(unit):
     if isinstance(unit[0], string_types):
         unit = unit[1], unit[0]
     return unit
-    
+
+
 def _get_gadget_format(filename):
-    # check and return gadget binary format
-    f = open(filename, 'rb')
-    (rhead,) = struct.unpack('<I',f.read(4))
-    f.close()
-    if (rhead == 134217728) | (rhead == 8):
-        return 2
-    elif (rhead == 65536) | (rhead == 256):
-        return 1
+    # check and return gadget binary format with file endianness
+    ff = open(filename, 'rb')
+    (rhead,) = struct.unpack('<I', ff.read(4))
+    ff.close()
+    if (rhead == 134217728):
+        return 2, '>'
+    elif (rhead == 8):
+        return 2, '<'
+    elif (rhead == 65536):
+        return 1, '>'
+    elif (rhead == 256):
+        return 1, '<'
     else:
         raise RuntimeError("Incorrect Gadget format %s!" % str(rhead))
 
+
 class GadgetBinaryFile(ParticleFile):
     def __init__(self, ds, io, filename, file_id):
+        gformat = _get_gadget_format(filename)
         with open(filename, "rb") as f:
-            if _get_gadget_format(filename) == 2:
-                f.seek(f.tell()+16)
-            self.header = read_record(f, ds._header_spec)
+            if gformat[0] == 2:
+                f.seek(f.tell() + SNAP_FORMAT_2_OFFSET)
+            self.header = read_record(f, ds._header_spec, endian=gformat[1])
+            if gformat[0] == 2:
+                f.seek(f.tell() + SNAP_FORMAT_2_OFFSET)
             self._position_offset = f.tell()
             f.seek(0, os.SEEK_END)
             self._file_size = f.tell()
@@ -76,6 +87,7 @@
             field_list, self.total_particles,
             self._position_offset, self._file_size)
 
+
 class GadgetDataset(SPHDataset):
     _index_class = ParticleIndex
     _file_class = GadgetBinaryFile
@@ -91,16 +103,17 @@
                  over_refine_factor=1,
                  kernel_name=None,
                  index_ptype="all",
-                 bounding_box = None,
-                 header_spec = "default",
-                 field_spec = "default",
-                 ptype_spec = "default",
+                 bounding_box=None,
+                 header_spec="default",
+                 field_spec="default",
+                 ptype_spec="default",
                  units_override=None,
                  unit_system="cgs",
                  use_dark_factor = False,
                  w_0 = -1.0,
                  w_a = 0.0):
-        if self._instantiated: return
+        if self._instantiated:
+            return
         self._header_spec = self._setup_binary_spec(
             header_spec, gadget_header_specs)
         self._field_spec = self._setup_binary_spec(
@@ -118,12 +131,12 @@
             bbox = np.array(bounding_box, dtype="float64")
             if bbox.shape == (2, 3):
                 bbox = bbox.transpose()
-            self.domain_left_edge = bbox[:,0]
-            self.domain_right_edge = bbox[:,1]
+            self.domain_left_edge = bbox[:, 0]
+            self.domain_right_edge = bbox[:, 1]
         else:
             self.domain_left_edge = self.domain_right_edge = None
         if units_override is not None:
-            raise RuntimeError("units_override is not supported for GadgetDataset. "+
+            raise RuntimeError("units_override is not supported for GadgetDataset. " +
                                "Use unit_base instead.")
 
         # Set dark energy parameters before cosmology object is created
@@ -158,11 +171,11 @@
     def _get_hvals(self):
         # The entries in this header are capitalized and named to match Table 4
         # in the GADGET-2 user guide.
-
+        gformat = _get_gadget_format(self.parameter_filename)
         f = open(self.parameter_filename, 'rb')
-        if _get_gadget_format(self.parameter_filename) == 2:
-            f.seek(f.tell()+16)
-        hvals = read_record(f, self._header_spec)
+        if gformat[0] == 2:
+            f.seek(f.tell() + SNAP_FORMAT_2_OFFSET)
+        hvals = read_record(f, self._header_spec, endian=gformat[1])
         for i in hvals:
             if len(hvals[i]) == 1:
                 hvals[i] = hvals[i][0]
@@ -200,7 +213,8 @@
         # It may be possible to deduce whether ComovingIntegration is on
         # somehow, but opinions on this vary.
         if self.omega_lambda == 0.0:
-            only_on_root(mylog.info, "Omega Lambda is 0.0, so we are turning off Cosmology.")
+            only_on_root(
+                mylog.info, "Omega Lambda is 0.0, so we are turning off Cosmology.")
             self.hubble_constant = 1.0  # So that scaling comes out correct
             self.cosmological_simulation = 0
             self.current_redshift = 0.0
@@ -230,14 +244,17 @@
         self.file_count = hvals["NumFiles"]
 
     def _set_code_unit_attributes(self):
-        # If no units passed in by user, set a sane default (Gadget-2 users guide).
+        # If no units passed in by user, set a sane default (Gadget-2 users
+        # guide).
         if self._unit_base is None:
             if self.cosmological_simulation == 1:
-                only_on_root(mylog.info, "Assuming length units are in kpc/h (comoving)")
-                self._unit_base = dict(length = (1.0, "kpccm/h"))
+                only_on_root(
+                    mylog.info, "Assuming length units are in kpc/h (comoving)")
+                self._unit_base = dict(length=(1.0, "kpccm/h"))
             else:
-                only_on_root(mylog.info, "Assuming length units are in kpc (physical)")
-                self._unit_base = dict(length = (1.0, "kpc"))
+                only_on_root(
+                    mylog.info, "Assuming length units are in kpc (physical)")
+                self._unit_base = dict(length=(1.0, "kpc"))
 
         # If units passed in by user, decide what to do about
         # co-moving and factors of h
@@ -314,10 +331,10 @@
         is a Gadget binary file, and endianswap is the endianness character '>' or '<'.
         '''
         try:
-            f = open(filename,'rb')
+            f = open(filename, 'rb')
         except IOError:
             try:
-                f = open(filename+".0")
+                f = open(filename + ".0")
             except IOError:
                 return False, 1
 
@@ -327,7 +344,7 @@
         # The int32 following the header (first 4+256 bytes) must equal this
         # number.
         try:
-            (rhead,) = struct.unpack('<I',f.read(4))
+            (rhead,) = struct.unpack('<I', f.read(4))
         except struct.error:
             f.close()
             return False, 1
@@ -339,25 +356,25 @@
         # Enabled Format2 here
         elif rhead == 8:
             f.close()
-            return True, 'float32'
+            return True, 'f8'
         elif rhead == 134217728:
             f.close()
-            return True, 'float32'
+            return True, 'f4'
         else:
             f.close()
             return False, 1
         # Read in particle number from header
-        np0 = sum(struct.unpack(endianswap+'IIIIII',f.read(6*4)))
+        np0 = sum(struct.unpack(endianswap + 'IIIIII', f.read(6 * 4)))
         # Read in size of position block. It should be 4 bytes per float,
         # with 3 coordinates (x,y,z) per particle. (12 bytes per particle)
-        f.seek(4+256+4,0)
-        np1 = struct.unpack(endianswap+'I',f.read(4))[0]/(4*3)
+        f.seek(4 + 256 + 4, 0)
+        np1 = struct.unpack(endianswap + 'I', f.read(4))[0] / (4 * 3)
         f.close()
         # Compare
         if np0 == np1:
-            return True, 'float32'
+            return True, 'f4'
         elif np1 == 2*np0:
-            return True, 'float64'
+            return True, 'f8'
         else:
             return False, 1
 
@@ -366,6 +383,7 @@
         # First 4 bytes used to check load
         return GadgetDataset._validate_header(args[0])[0]
 
+
 class GadgetHDF5Dataset(GadgetDataset):
     _file_class = ParticleFile
     _field_info_class = GadgetFieldInfo
@@ -373,17 +391,17 @@
     _suffix = ".hdf5"
 
     def __init__(self, filename, dataset_type="gadget_hdf5",
-                 unit_base = None, n_ref=64,
+                 unit_base=None, n_ref=64,
                  over_refine_factor=1,
                  kernel_name=None,
                  index_ptype="all",
-                 bounding_box = None,
+                 bounding_box=None,
                  units_override=None,
                  unit_system="cgs"):
         self.storage_filename = None
         filename = os.path.abspath(filename)
         if units_override is not None:
-            raise RuntimeError("units_override is not supported for GadgetHDF5Dataset. "+
+            raise RuntimeError("units_override is not supported for GadgetHDF5Dataset. " +
                                "Use unit_base instead.")
         super(GadgetHDF5Dataset, self).__init__(
             filename, dataset_type, unit_base=unit_base, n_ref=n_ref,
@@ -408,8 +426,6 @@
         handle.close()
         return uvals
 
-
-
     def _set_owls_eagle(self):
 
         self.dimensionality = 3
@@ -428,7 +444,8 @@
 
         if self.domain_left_edge is None:
             self.domain_left_edge = np.zeros(3, "float64")
-            self.domain_right_edge = np.ones(3, "float64") * self.parameters["BoxSize"]
+            self.domain_right_edge = np.ones(
+                3, "float64") * self.parameters["BoxSize"]
 
         nz = 1 << self.over_refine_factor
         self.domain_dimensions = np.ones(3, "int32") * nz
@@ -452,9 +469,11 @@
 
         # note the contents of the HDF5 Units group are in _unit_base
         # note the velocity stored on disk is sqrt(a) dx/dt
-        self.length_unit = self.quan(self._unit_base["UnitLength_in_cm"], 'cmcm/h')
+        self.length_unit = self.quan(
+            self._unit_base["UnitLength_in_cm"], 'cmcm/h')
         self.mass_unit = self.quan(self._unit_base["UnitMass_in_g"], 'g/h')
-        self.velocity_unit = self.quan(self._unit_base["UnitVelocity_in_cm_per_s"], 'cm/s')
+        self.velocity_unit = self.quan(
+            self._unit_base["UnitVelocity_in_cm_per_s"], 'cm/s')
         self.time_unit = self.quan(self._unit_base["UnitTime_in_s"], 's/h')
 
     @classmethod
@@ -465,7 +484,7 @@
         try:
             fh = h5py.File(args[0], mode='r')
             valid = all(ng in fh["/"] for ng in need_groups) and \
-              not any(vg in fh["/"] for vg in veto_groups)
+                not any(vg in fh["/"] for vg in veto_groups)
             fh.close()
         except:
             valid = False

diff -r e78cd10a6d5cb98eeba67425c7f984bd9c72bf9d -r 2cda15e1ef4d6accc2c6f295789a87a747d43331 yt/frontends/gadget/definitions.py
--- a/yt/frontends/gadget/definitions.py
+++ b/yt/frontends/gadget/definitions.py
@@ -93,3 +93,5 @@
     "PartType4",
     "PartType5"
 )
+
+SNAP_FORMAT_2_OFFSET = 16

diff -r e78cd10a6d5cb98eeba67425c7f984bd9c72bf9d -r 2cda15e1ef4d6accc2c6f295789a87a747d43331 yt/frontends/gadget/io.py
--- a/yt/frontends/gadget/io.py
+++ b/yt/frontends/gadget/io.py
@@ -30,7 +30,8 @@
     _get_gadget_format
 
 from .definitions import \
-    gadget_hdf5_ptypes
+    gadget_hdf5_ptypes, \
+    SNAP_FORMAT_2_OFFSET
 
 
 class IOHandlerGadgetHDF5(BaseIOHandler):
@@ -39,8 +40,7 @@
     _known_ptypes = gadget_hdf5_ptypes
     _var_mass = None
     _element_names = ('Hydrogen', 'Helium', 'Carbon', 'Nitrogen', 'Oxygen',
-                       'Neon', 'Magnesium', 'Silicon', 'Iron' )
-
+                      'Neon', 'Magnesium', 'Silicon', 'Iron')
 
     @property
     def var_mass(self):
@@ -68,9 +68,9 @@
             for ptype, field_list in sorted(ptf.items()):
                 if data_file.total_particles[ptype] == 0:
                     continue
-                x = f["/%s/Coordinates" % ptype][:,0].astype("float64")
-                y = f["/%s/Coordinates" % ptype][:,1].astype("float64")
-                z = f["/%s/Coordinates" % ptype][:,2].astype("float64")
+                x = f["/%s/Coordinates" % ptype][:, 0].astype("float64")
+                y = f["/%s/Coordinates" % ptype][:, 1].astype("float64")
+                z = f["/%s/Coordinates" % ptype][:, 2].astype("float64")
                 yield ptype, (x, y, z)
             f.close()
 
@@ -88,28 +88,29 @@
                 g = f["/%s" % ptype]
                 coords = g["Coordinates"][:].astype("float64")
                 mask = selector.select_points(
-                            coords[:,0], coords[:,1], coords[:,2], 0.0)
+                    coords[:, 0], coords[:, 1], coords[:, 2], 0.0)
                 del coords
-                if mask is None: continue
+                if mask is None:
+                    continue
                 for field in field_list:
 
                     if field in ("Mass", "Masses") and \
-                        ptype not in self.var_mass:
+                            ptype not in self.var_mass:
                         data = np.empty(mask.sum(), dtype="float64")
                         ind = self._known_ptypes.index(ptype)
                         data[:] = self.ds["Massarr"][ind]
 
                     elif field in self._element_names:
                         rfield = 'ElementAbundance/' + field
-                        data = g[rfield][:][mask,...]
+                        data = g[rfield][:][mask, ...]
                     elif field.startswith("Metallicity_"):
                         col = int(field.rsplit("_", 1)[-1])
-                        data = g["Metallicity"][:,col][mask]
+                        data = g["Metallicity"][:, col][mask]
                     elif field.startswith("Chemistry_"):
                         col = int(field.rsplit("_", 1)[-1])
-                        data = g["ChemistryAbundances"][:,col][mask]
+                        data = g["ChemistryAbundances"][:, col][mask]
                     else:
-                        data = g[field][:][mask,...]
+                        data = g[field][:][mask, ...]
 
                     yield (ptype, field), data
             f.close()
@@ -127,16 +128,18 @@
         morton = np.empty(pcount, dtype='uint64')
         ind = 0
         for key in keys:
-            if not key.startswith("PartType"): continue
-            if "Coordinates" not in f[key]: continue
+            if not key.startswith("PartType"):
+                continue
+            if "Coordinates" not in f[key]:
+                continue
             ds = f[key]["Coordinates"]
-            dt = ds.dtype.newbyteorder("N") # Native
+            dt = ds.dtype.newbyteorder("N")  # Native
             pos = np.empty(ds.shape, dtype=dt)
             pos[:] = ds
             regions.add_data_file(pos, data_file.file_id,
                                   data_file.ds.filter_bbox)
-            morton[ind:ind+pos.shape[0]] = compute_morton(
-                pos[:,0], pos[:,1], pos[:,2],
+            morton[ind:ind + pos.shape[0]] = compute_morton(
+                pos[:, 0], pos[:, 1], pos[:, 2],
                 data_file.ds.domain_left_edge,
                 data_file.ds.domain_right_edge,
                 data_file.ds.filter_bbox)
@@ -151,7 +154,6 @@
         npart = dict(("PartType%s" % (i), v) for i, v in enumerate(pcount))
         return npart
 
-
     def _identify_fields(self, data_file):
         f = h5py.File(data_file.filename, "r")
         fields = []
@@ -205,8 +207,10 @@
         f.close()
         return fields, {}
 
+
 ZeroMass = object()
 
+
 class IOHandlerGadgetBinary(BaseIOHandler):
     _dataset_type = "gadget_binary"
     _vector_fields = (("Coordinates", 3),
@@ -231,13 +235,17 @@
     #   TSTP    (only if enabled in makefile)
 
     _var_mass = None
+    _format = None
 
     def __init__(self, ds, *args, **kwargs):
         self._vector_fields = dict(self._vector_fields)
         self._fields = ds._field_spec
         self._ptypes = ds._ptype_spec
         self.data_files = set([])
-        self._format =  _get_gadget_format(ds.parameter_filename)#default gadget format 1
+        gformat = _get_gadget_format(ds.parameter_filename)
+        # gadget format 1 original, 2 with block name
+        self._format = gformat[0]
+        self._endian = gformat[1]
         super(IOHandlerGadgetBinary, self).__init__(ds, *args, **kwargs)
 
     @property
@@ -266,8 +274,8 @@
                 # This is where we could implement sub-chunking
                 f.seek(poff[ptype, "Coordinates"], os.SEEK_SET)
                 pos = self._read_field_from_file(f,
-                            tp[ptype], "Coordinates")
-                yield ptype, (pos[:,0], pos[:,1], pos[:,2])
+                                                 tp[ptype], "Coordinates")
+                yield ptype, (pos[:, 0], pos[:, 1], pos[:, 2])
             f.close()
 
     def _read_particle_fields(self, chunks, ptf, selector):
@@ -282,11 +290,12 @@
             for ptype, field_list in sorted(ptf.items()):
                 f.seek(poff[ptype, "Coordinates"], os.SEEK_SET)
                 pos = self._read_field_from_file(f,
-                            tp[ptype], "Coordinates")
+                                                 tp[ptype], "Coordinates")
                 mask = selector.select_points(
-                    pos[:,0], pos[:,1], pos[:,2], 0.0)
+                    pos[:, 0], pos[:, 1], pos[:, 2], 0.0)
                 del pos
-                if mask is None: continue
+                if mask is None:
+                    continue
                 for field in field_list:
                     if field == "Mass" and ptype not in self.var_mass:
                         data = np.empty(mask.sum(), dtype="float64")
@@ -297,66 +306,90 @@
                         continue
                     f.seek(poff[ptype, field], os.SEEK_SET)
                     data = self._read_field_from_file(f, tp[ptype], field)
-                    data = data[mask,...]
+                    data = data[mask, ...]
                     yield (ptype, field), data
             f.close()
 
     def _read_field_from_file(self, f, count, name):
-        if count == 0: return
+        if count == 0:
+            return
         if name == "ParticleIDs":
-            dt = "uint32"
+            dt = self._endian + "u4"
         else:
-            dt = self._float_type
+            dt = self._endian + self._float_type
         if name in self._vector_fields:
             count *= self._vector_fields[name]
-        arr = np.fromfile(f, dtype=dt, count = count)
+        arr = np.fromfile(f, dtype=dt, count=count)
         if name in self._vector_fields:
             factor = self._vector_fields[name]
-            arr = arr.reshape((count//factor, factor), order="C")
-        return arr.astype("float64")
+            arr = arr.reshape((count // factor, factor), order="C")
+        return arr.astype(self._float_type)
+
+    def _get_morton_from_position(self, data_file, count, offset_count,
+                                  regions, DLE, DRE):
+        with open(data_file.filename, "rb") as f:
+            # We add on an additionally 4 for the first record.
+            f.seek(data_file._position_offset + 4 + offset_count * 12)
+            # The first total_particles * 3 values are positions
+            pp = np.fromfile(f, dtype=self._endian + self._float_type,
+                             count=count * 3)
+            pp.shape = (count, 3)
+            pp = pp.astype(self._float_type)
+        regions.add_data_file(pp, data_file.file_id,
+                                  data_file.ds.filter_bbox)
+        morton = compute_morton(pp[:, 0], pp[:, 1], pp[:, 2], DLE, DRE,
+                                data_file.ds.filter_bbox)
+        return morton
 
     def _initialize_index(self, data_file, regions):
-        count = sum(data_file.total_particles.values())
         DLE = data_file.ds.domain_left_edge
         DRE = data_file.ds.domain_right_edge
         self._float_type = data_file.ds._validate_header(data_file.filename)[1]
-        self._field_size = np.dtype(self._float_type).itemsize
-        with open(data_file.filename, "rb") as f:
-            # We add on an additionally 4 for the first record.
-            f.seek(data_file._position_offset + 4)
-            # The first total_particles * 3 values are positions
-            pp = np.fromfile(f, dtype=self._float_type, count=count*3)
-            pp.shape = (count, 3)
-        regions.add_data_file(pp, data_file.file_id, data_file.ds.filter_bbox)
-        morton = compute_morton(pp[:,0], pp[:,1], pp[:,2], DLE, DRE,
-                                data_file.ds.filter_bbox)
-        return morton
+        if self.index_ptype == "all":
+            count = sum(data_file.total_particles.values())
+            return self._get_morton_from_position(
+                data_file, count, 0, regions, DLE, DRE)
+        else:
+            idpos = self._ptypes.index(self.index_ptype)
+            count = data_file.total_particles.get(self.index_ptype)
+            account = [0] + [data_file.total_particles.get(ptype)
+                             for ptype in self._ptypes]
+            account = np.cumsum(account)
+            return self._get_morton_from_position(
+                data_file, account, account[idpos], regions, DLE, DRE)
 
     def _count_particles(self, data_file):
         npart = dict((self._ptypes[i], v)
-            for i, v in enumerate(data_file.header["Npart"]))
+                     for i, v in enumerate(data_file.header["Npart"]))
         return npart
 
     # header is 256, but we have 4 at beginning and end for ints
+    _field_size = 4
     def _calculate_field_offsets(self, field_list, pcount,
-                                 offset, file_size = None):
+                                 offset, file_size=None):
         # field_list is (ftype, fname) but the blocks are ordered
         # (fname, ftype) in the file.
-        pos = offset
+        if self._format == 2:
+            # Need to subtract offset due to extra header block
+            pos = offset - SNAP_FORMAT_2_OFFSET
+        else:
+            pos = offset
         fs = self._field_size
         offsets = {}
+
         for field in self._fields:
             if not isinstance(field, string_types):
                 field = field[0]
-            if not any( (ptype, field) in field_list
-                        for ptype in self._ptypes):
+            if not any((ptype, field) in field_list
+                       for ptype in self._ptypes):
                 continue
             if self._format == 2:
-                pos += 20 #skip block header
+                pos += 20  # skip block header
             elif self._format == 1:
                 pos += 4
             else:
-                raise RuntimeError("incorrect Gadget format %s!" % str(self._format))
+                raise RuntimeError(
+                    "incorrect Gadget format %s!" % str(self._format))
             any_ptypes = False
             for ptype in self._ptypes:
                 if field == "Mass" and ptype not in self.var_mass:
@@ -370,9 +403,10 @@
                 else:
                     pos += pcount[ptype] * fs
             pos += 4
-            if not any_ptypes: pos -= 8
+            if not any_ptypes:
+                pos -= 8
         if file_size is not None:
-            if (file_size != pos) & (self._format == 1): #ignore the rest of format 2 
+            if (file_size != pos) & (self._format == 1):  # ignore the rest of format 2
                 mylog.warning("Your Gadget-2 file may have extra " +
                               "columns or different precision!" +
                               " (%s file vs %s computed)",
@@ -385,13 +419,15 @@
         tp = domain.total_particles
         for i, ptype in enumerate(self._ptypes):
             count = tp[ptype]
-            if count == 0: continue
+            if count == 0:
+                continue
             m = domain.header["Massarr"][i]
             for field in self._fields:
                 if isinstance(field, tuple):
                     field, req = field
                     if req is ZeroMass:
-                        if m > 0.0 : continue
+                        if m > 0.0:
+                            continue
                     elif isinstance(req, tuple) and ptype in req:
                         pass
                     elif req != ptype:

diff -r e78cd10a6d5cb98eeba67425c7f984bd9c72bf9d -r 2cda15e1ef4d6accc2c6f295789a87a747d43331 yt/frontends/gadget/tests/test_outputs.py
--- a/yt/frontends/gadget/tests/test_outputs.py
+++ b/yt/frontends/gadget/tests/test_outputs.py
@@ -25,6 +25,7 @@
 
 isothermal_h5 = "IsothermalCollapse/snap_505.hdf5"
 isothermal_bin = "IsothermalCollapse/snap_505"
+BE_Gadget = "BigEndianGadgetBinary/BigEndianGadgetBinary"
 
 # This maps from field names to weight field names to use for projections
 iso_fields = OrderedDict(
@@ -41,13 +42,17 @@
 )
 iso_kwargs = dict(bounding_box=[[-3, 3], [-3, 3], [-3, 3]])
 
+
 @requires_file(isothermal_h5)
 @requires_file(isothermal_bin)
+ at requires_file(BE_Gadget)
 def test_GadgetDataset():
     assert isinstance(data_dir_load(isothermal_h5, kwargs=iso_kwargs),
                       GadgetHDF5Dataset)
     assert isinstance(data_dir_load(isothermal_bin, kwargs=iso_kwargs),
                       GadgetDataset)
+    assert isinstance(data_dir_load(BE_Gadget, kwargs=''),
+                      GadgetDataset)
 
 
 @requires_ds(isothermal_h5)

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.



More information about the yt-svn mailing list