[yt-svn] commit/yt: samskillman: Adding grid_data_format into yt repository

Fri May 4 13:14:41 PDT 2012

1 new commit in yt:


https://bitbucket.org/yt_analysis/yt/changeset/e70d7b287965/
changeset:   e70d7b287965
branch:      yt
user:        samskillman
date:        2012-05-04 22:06:57
summary:     Adding grid_data_format into yt repository
affected #:  8 files

diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/__init__.py

--- /dev/null
+++ b/yt/utilities/grid_data_format/__init__.py
@@ -0,0 +1,2 @@
+from conversion import *
+


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/conversion/__init__.py
--- /dev/null
+++ b/yt/utilities/grid_data_format/conversion/__init__.py
@@ -0,0 +1,3 @@
+from conversion_abc import Converter
+from conversion_athena import AthenaDistributedConverter, AthenaConverter
+


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/conversion/conversion_abc.py
--- /dev/null
+++ b/yt/utilities/grid_data_format/conversion/conversion_abc.py
@@ -0,0 +1,7 @@
+
+class Converter(object):
+    def __init__(self, basename, outname=None):
+        self.basename = basename
+        self.outname = outname
+    def convert(self):
+        pass


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/conversion/conversion_athena.py
--- /dev/null
+++ b/yt/utilities/grid_data_format/conversion/conversion_athena.py
@@ -0,0 +1,503 @@
+import os
+import weakref
+import numpy as na
+import h5py as h5
+from conversion_abc import *
+from glob import glob
+from collections import \
+    defaultdict
+from string import \
+    strip, \
+    rstrip
+from stat import \
+    ST_CTIME
+
+translation_dict = {}
+translation_dict['density'] = 'density'
+translation_dict['total_energy'] = 'specific_energy'
+translation_dict['velocity_x'] = 'velocity_x'
+translation_dict['velocity_y'] = 'velocity_y'
+translation_dict['velocity_z'] = 'velocity_z'
+translation_dict['cell_centered_B_x'] = 'mag_field_x'
+translation_dict['cell_centered_B_y'] = 'mag_field_y'
+translation_dict['cell_centered_B_z'] = 'mag_field_z'
+
+class AthenaDistributedConverter(Converter):
+    def __init__(self, basename, outname=None, source_dir=None, field_conversions=None):
+        self.fields = []
+        self.current_time=0.0
+        name = basename.split('.')
+        self.ddn = int(name[1])
+        if source_dir is None:
+            source_dir = './'
+        self.source_dir = source_dir+'/'
+        self.basename = name[0]
+        if outname is None:
+            outname = self.basename+'.%04i'%self.ddn+'.gdf'
+        self.outname = outname
+        if field_conversions is None:
+            field_conversions = {}
+        self.field_conversions = field_conversions
+        self.handle = None
+
+    def parse_line(self,line, grid):
+    #    print line
+        # grid is a dictionary
+        splitup = line.strip().split()
+        if "vtk" in splitup:
+            grid['vtk_version'] = splitup[-1]
+        elif "Really" in splitup:
+            grid['time'] = splitup[-1]
+            self.current_time = grid['time']
+        elif 'PRIMITIVE' in splitup:
+            grid['time'] = float(splitup[4].rstrip(','))
+            grid['level'] = int(splitup[6].rstrip(','))
+            grid['domain'] = int(splitup[8].rstrip(','))
+            self.current_time = grid['time']
+        elif "DIMENSIONS" in splitup:
+            grid['dimensions'] = na.array(splitup[-3:]).astype('int')
+        elif "ORIGIN" in splitup:
+            grid['left_edge'] = na.array(splitup[-3:]).astype('float64')
+        elif "SPACING" in splitup:
+            grid['dds'] = na.array(splitup[-3:]).astype('float64')
+        elif "CELL_DATA" in splitup:
+            grid["ncells"] = int(splitup[-1])
+        elif "SCALARS" in splitup:
+            field = splitup[1]
+            grid['read_field'] = field
+            grid['read_type'] = 'scalar'
+        elif "VECTORS" in splitup:
+            field = splitup[1]
+            grid['read_field'] = field
+            grid['read_type'] = 'vector'
+
+    def write_gdf_field(self, fn, grid_number, field, data):
+        f = self.handle
+        ## --------- Store Grid Data --------- ##
+        if 'grid_%010i'%grid_number not in f['data'].keys():
+            g = f['data'].create_group('grid_%010i'%grid_number)
+        else:
+            g = f['data']['grid_%010i'%grid_number]
+        name = field
+        try:
+            name = translation_dict[name]
+        except:
+            pass
+        # print 'Writing %s' % name
+        if not name in g.keys(): 
+            g.create_dataset(name,data=data)
+        
+
+
+    def read_and_write_hierarchy(self,basename, ddn, gdf_name):
+        """ Read Athena legacy vtk file from multiple cpus """
+        proc_names = glob(self.source_dir+'id*')
+        #print 'Reading a dataset from %i Processor Files' % len(proc_names)
+        N = len(proc_names)
+        grid_dims = na.empty([N,3],dtype='int64')
+        grid_left_edges = na.empty([N,3],dtype='float64')
+        grid_dds = na.empty([N,3],dtype='float64')
+        grid_levels = na.zeros(N,dtype='int64')
+        grid_parent_ids = -1*na.ones(N,dtype='int64')
+        grid_particle_counts = na.zeros([N,1],dtype='int64')
+
+        for i in range(N):
+            if i == 0:
+                fn = self.source_dir+'id%i/'%i + basename + '.%04i'%ddn + '.vtk'
+            else:
+                fn = self.source_dir+'id%i/'%i + basename + '-id%i'%i + '.%04i'%ddn + '.vtk'
+
+            print 'Reading file %s' % fn
+            f = open(fn,'rb')
+            grid = {}
+            grid['read_field'] = None
+            grid['read_type'] = None
+            table_read=False
+            line = f.readline()
+            while grid['read_field'] is None:
+                self.parse_line(line, grid)
+                if "SCALAR" in line.strip().split():
+                    break
+                if "VECTOR" in line.strip().split():
+                    break
+                if 'TABLE' in line.strip().split():
+                    break
+                if len(line) == 0: break
+                del line
+                line = f.readline()
+
+            if len(line) == 0: break
+            
+            if na.prod(grid['dimensions']) != grid['ncells']:
+                grid['dimensions'] -= 1
+                grid['dimensions'][grid['dimensions']==0]=1
+            if na.prod(grid['dimensions']) != grid['ncells']:
+                print 'product of dimensions %i not equal to number of cells %i' % \
+                      (na.prod(grid['dimensions']), grid['ncells'])
+                raise TypeError
+
+            # Append all hierachy info before reading this grid's data
+            grid_dims[i]=grid['dimensions']
+            grid_left_edges[i]=grid['left_edge']
+            grid_dds[i]=grid['dds']
+            #grid_ncells[i]=grid['ncells']
+            del grid
+
+            f.close()
+            del f
+        f = self.handle 
+
+        ## --------- Begin level nodes --------- ##
+        g = f.create_group('gridded_data_format')
+        g.attrs['format_version']=na.float32(1.0)
+        g.attrs['data_software']='athena'
+        data_g = f.create_group('data')
+        field_g = f.create_group('field_types')
+        part_g = f.create_group('particle_types')
+        pars_g = f.create_group('simulation_parameters')
+
+
+        gles = grid_left_edges
+        gdims = grid_dims
+        dle = na.min(gles,axis=0)
+        dre = na.max(gles+grid_dims*grid_dds,axis=0)
+        glis = ((gles - dle)/grid_dds).astype('int64')
+        gris = glis + gdims
+
+        ddims = (dre-dle)/grid_dds[0]
+
+        # grid_left_index
+        gli = f.create_dataset('grid_left_index',data=glis)
+        # grid_dimensions
+        gdim = f.create_dataset('grid_dimensions',data=gdims)
+
+        # grid_level
+        level = f.create_dataset('grid_level',data=grid_levels)
+
+        ## ----------QUESTIONABLE NEXT LINE--------- ##
+        # This data needs two dimensions for now. 
+        part_count = f.create_dataset('grid_particle_count',data=grid_particle_counts)
+
+        # grid_parent_id
+        pids = f.create_dataset('grid_parent_id',data=grid_parent_ids)
+
+        ## --------- Done with top level nodes --------- ##
+
+        pars_g.attrs['refine_by'] = na.int64(1)
+        pars_g.attrs['dimensionality'] = na.int64(3)
+        pars_g.attrs['domain_dimensions'] = ddims
+        pars_g.attrs['current_time'] = self.current_time
+        pars_g.attrs['domain_left_edge'] = dle
+        pars_g.attrs['domain_right_edge'] = dre
+        pars_g.attrs['unique_identifier'] = 'athenatest'
+        pars_g.attrs['cosmological_simulation'] = na.int64(0)
+        pars_g.attrs['num_ghost_zones'] = na.int64(0)
+        pars_g.attrs['field_ordering'] = na.int64(1)
+        pars_g.attrs['boundary_conditions'] = na.int64([0]*6) # For Now
+
+        # Extra pars:
+        # pars_g.attrs['n_cells'] = grid['ncells']
+        pars_g.attrs['vtk_version'] = 1.0
+
+        # Add particle types
+        # Nothing to do here
+
+        # Add particle field attributes
+        #f.close()
+
+
+    def read_and_write_data(self, basename, ddn, gdf_name):
+        proc_names = glob(self.source_dir+'id*')
+        #print 'Reading a dataset from %i Processor Files' % len(proc_names)
+        N = len(proc_names)
+        for i in range(N):
+            if i == 0:
+                fn = self.source_dir+'id%i/'%i + basename + '.%04i'%ddn + '.vtk'
+            else:
+                fn = self.source_dir+'id%i/'%i + basename + '-id%i'%i + '.%04i'%ddn + '.vtk'
+            f = open(fn,'rb')
+            #print 'Reading data from %s' % fn
+            line = f.readline()
+            while line is not '':
+                # print line
+                if len(line) == 0: break
+                splitup = line.strip().split()
+
+                if "DIMENSIONS" in splitup:
+                    grid_dims = na.array(splitup[-3:]).astype('int')
+                    line = f.readline()
+                    continue
+                elif "CELL_DATA" in splitup:
+                    grid_ncells = int(splitup[-1])
+                    line = f.readline()
+                    if na.prod(grid_dims) != grid_ncells:
+                        grid_dims -= 1
+                        grid_dims[grid_dims==0]=1
+                    if na.prod(grid_dims) != grid_ncells:
+                        print 'product of dimensions %i not equal to number of cells %i' % \
+                              (na.prod(grid_dims), grid_ncells)
+                        raise TypeError
+                    break
+                else:
+                    del line
+                    line = f.readline()
+            read_table = False
+            while line is not '':
+                if len(line) == 0: break
+                splitup = line.strip().split()
+                if 'SCALARS' in splitup:
+                    field = splitup[1]
+                    if not read_table:
+                        line = f.readline() # Read the lookup table line
+                        read_table = True
+                    data = na.fromfile(f, dtype='>f4', count=grid_ncells).reshape(grid_dims,order='F')
+                    if i == 0:
+                        self.fields.append(field)
+                    # print 'writing field %s' % field
+                    self.write_gdf_field(gdf_name, i, field, data)
+                    read_table=False
+
+                elif 'VECTORS' in splitup:
+                    field = splitup[1]
+                    data = na.fromfile(f, dtype='>f4', count=3*grid_ncells)
+                    data_x = data[0::3].reshape(grid_dims,order='F')
+                    data_y = data[1::3].reshape(grid_dims,order='F')
+                    data_z = data[2::3].reshape(grid_dims,order='F')
+                    if i == 0:
+                        self.fields.append(field+'_x')
+                        self.fields.append(field+'_y')
+                        self.fields.append(field+'_z')
+
+                    # print 'writing field %s' % field
+                    self.write_gdf_field(gdf_name, i, field+'_x', data_x)
+                    self.write_gdf_field(gdf_name, i, field+'_y', data_y)
+                    self.write_gdf_field(gdf_name, i, field+'_z', data_z)
+                    del data, data_x, data_y, data_z
+                del line
+                line = f.readline()
+            f.close()
+            del f
+
+        f = self.handle 
+        field_g = f['field_types']
+        # Add Field Attributes
+        for name in self.fields:
+            tname = name
+            try:
+                tname = translation_dict[name]
+            except:
+                pass
+            this_field = field_g.create_group(tname)
+            if name in self.field_conversions.keys():
+                this_field.attrs['field_to_cgs'] = self.field_conversions[name]
+            else:
+                this_field.attrs['field_to_cgs'] = na.float64('1.0') # For Now
+            
+
+    def convert(self, hierarchy=True, data=True):
+        self.handle = h5.File(self.outname, 'a')
+        if hierarchy:
+            self.read_and_write_hierarchy(self.basename, self.ddn ,self.outname)
+        if data:
+            self.read_and_write_data(self.basename, self.ddn ,self.outname)
+        self.handle.close()
+
+class AthenaConverter(Converter):
+    def __init__(self, basename, outname=None, field_conversions=None):
+        self.fields = []
+        self.basename = basename
+        name = basename.split('.')
+        fn = '%s.%04i'%(name[0],int(name[1]))
+        self.ddn = int(name[1])
+        self.basename = fn
+        if outname is None:
+            outname = fn+'.gdf'
+        self.outname = outname
+        if field_conversions is None:
+            field_conversions = {}
+        self.field_conversions = field_conversions
+
+
+    def parse_line(self, line, grid):
+    #    print line
+        # grid is a dictionary
+        splitup = line.strip().split()
+        if "vtk" in splitup:
+            grid['vtk_version'] = splitup[-1]
+        elif "Really" in splitup:
+            grid['time'] = splitup[-1]
+        elif "DIMENSIONS" in splitup:
+            grid['dimensions'] = na.array(splitup[-3:]).astype('int')
+        elif "ORIGIN" in splitup:
+            grid['left_edge'] = na.array(splitup[-3:]).astype('float64')
+        elif "SPACING" in splitup:
+            grid['dds'] = na.array(splitup[-3:]).astype('float64')
+        elif "CELL_DATA" in splitup:
+            grid["ncells"] = int(splitup[-1])
+        elif "SCALARS" in splitup:
+            field = splitup[1]
+            grid['read_field'] = field
+            grid['read_type'] = 'scalar'
+        elif "VECTORS" in splitup:
+            field = splitup[1]
+            grid['read_field'] = field
+            grid['read_type'] = 'vector'
+        
+    def read_grid(self, filename):
+        """ Read Athena legacy vtk file from single cpu """
+        f = open(filename,'rb')
+        #print 'Reading from %s'%filename
+        grid = {}
+        grid['read_field'] = None
+        grid['read_type'] = None
+        table_read=False
+        line = f.readline()
+        while line is not '':
+            while grid['read_field'] is None:
+                self.parse_line(line, grid)
+                if grid['read_type'] is 'vector':
+                    break
+                if table_read is False:             
+                    line = f.readline()
+                if 'TABLE' in line.strip().split():
+                    table_read = True
+                if len(line) == 0: break
+            #    print line
+
+            if len(line) == 0: break
+            if na.prod(grid['dimensions']) != grid['ncells']:
+                grid['dimensions'] -= 1
+            if na.prod(grid['dimensions']) != grid['ncells']:
+                print 'product of dimensions %i not equal to number of cells %i' % \
+                      (na.prod(grid['dimensions']), grid['ncells'])
+                raise TypeError
+
+            if grid['read_type'] is 'scalar':
+                grid[grid['read_field']] = \
+                    na.fromfile(f, dtype='>f4', count=grid['ncells']).reshape(grid['dimensions'],order='F')
+                self.fields.append(grid['read_field'])
+            elif grid['read_type'] is 'vector':
+                data = na.fromfile(f, dtype='>f4', count=3*grid['ncells'])
+                grid[grid['read_field']+'_x'] = data[0::3].reshape(grid['dimensions'],order='F')
+                grid[grid['read_field']+'_y'] = data[1::3].reshape(grid['dimensions'],order='F')
+                grid[grid['read_field']+'_z'] = data[2::3].reshape(grid['dimensions'],order='F')
+                self.fields.append(grid['read_field']+'_x')
+                self.fields.append(grid['read_field']+'_y')
+                self.fields.append(grid['read_field']+'_z')
+            else:
+                raise TypeError
+            grid['read_field'] = None
+            grid['read_type'] = None
+            line = f.readline()
+            if len(line) == 0: break
+        grid['right_edge'] = grid['left_edge']+grid['dds']*(grid['dimensions'])
+        return grid
+
+    def write_to_gdf(self, fn, grid):
+        f = h5.File(fn,'a')
+
+        ## --------- Begin level nodes --------- ##
+        g = f.create_group('gridded_data_format')
+        g.attrs['format_version']=na.float32(1.0)
+        g.attrs['data_software']='athena'
+        data_g = f.create_group('data')
+        field_g = f.create_group('field_types')
+        part_g = f.create_group('particle_types')
+        pars_g = f.create_group('simulation_parameters')
+
+        dle = grid['left_edge'] # True only in this case of one grid for the domain
+        gles = na.array([grid['left_edge']])
+        gdims = na.array([grid['dimensions']])
+        glis = ((gles - dle)/grid['dds']).astype('int64')
+        gris = glis + gdims
+
+        # grid_left_index
+        gli = f.create_dataset('grid_left_index',data=glis)
+        # grid_dimensions
+        gdim = f.create_dataset('grid_dimensions',data=gdims)
+
+        levels = na.array([0]).astype('int64') # unigrid example
+        # grid_level
+        level = f.create_dataset('grid_level',data=levels)
+
+        ## ----------QUESTIONABLE NEXT LINE--------- ##
+        # This data needs two dimensions for now. 
+        n_particles = na.array([[0]]).astype('int64')
+        #grid_particle_count
+        part_count = f.create_dataset('grid_particle_count',data=n_particles)
+
+        # Assume -1 means no parent.
+        parent_ids = na.array([-1]).astype('int64')
+        # grid_parent_id
+        pids = f.create_dataset('grid_parent_id',data=parent_ids)
+
+        ## --------- Done with top level nodes --------- ##
+
+        f.create_group('hierarchy')
+
+        ## --------- Store Grid Data --------- ##
+
+        g0 = data_g.create_group('grid_%010i'%0)
+        for field in self.fields:
+            name = field
+            if field in translation_dict.keys():
+                name = translation_dict[name]
+            if not name in g0.keys(): 
+                g0.create_dataset(name,data=grid[field])
+
+        ## --------- Store Particle Data --------- ##
+
+        # Nothing to do
+
+        ## --------- Attribute Tables --------- ##
+
+        pars_g.attrs['refine_by'] = na.int64(1)
+        pars_g.attrs['dimensionality'] = na.int64(3)
+        pars_g.attrs['domain_dimensions'] = grid['dimensions']
+        try:
+            pars_g.attrs['current_time'] = grid['time']
+        except:
+            pars_g.attrs['current_time'] = 0.0
+        pars_g.attrs['domain_left_edge'] = grid['left_edge'] # For Now
+        pars_g.attrs['domain_right_edge'] = grid['right_edge'] # For Now
+        pars_g.attrs['unique_identifier'] = 'athenatest'
+        pars_g.attrs['cosmological_simulation'] = na.int64(0)
+        pars_g.attrs['num_ghost_zones'] = na.int64(0)
+        pars_g.attrs['field_ordering'] = na.int64(0)
+        pars_g.attrs['boundary_conditions'] = na.int64([0]*6) # For Now
+
+        # Extra pars:
+        pars_g.attrs['n_cells'] = grid['ncells']
+        pars_g.attrs['vtk_version'] = grid['vtk_version']
+
+        # Add Field Attributes
+        for name in g0.keys():
+            tname = name
+            try:
+                tname = translation_dict[name]
+            except:
+                pass
+            this_field = field_g.create_group(tname)
+        if name in self.field_conversions.keys():
+            this_field.attrs['field_to_cgs'] = self.field_conversions[name]
+        else:
+            this_field.attrs['field_to_cgs'] = na.float64('1.0') # For Now
+
+        # Add particle types
+        # Nothing to do here
+
+        # Add particle field attributes
+        f.close()
+
+    def convert(self):
+        grid = self.read_grid(self.basename+'.vtk')
+        self.write_to_gdf(self.outname,grid)
+        
+# import sys
+# if __name__ == '__main__':
+#     n = sys.argv[-1]
+#     n = n.split('.')
+#     fn = '%s.%04i'%(n[0],int(n[1]))
+#     grid = read_grid(fn+'.vtk')
+#     write_to_hdf5(fn+'.gdf',grid)
+    


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/docs/IRATE_notes.txt
--- /dev/null
+++ b/yt/utilities/grid_data_format/docs/IRATE_notes.txt
@@ -0,0 +1,39 @@
+Here is info from Erik Tollerud about the IRATE data format.
+
+The bitbucket project is at https://bitbucket.org/eteq/irate-format
+and I've posted a copy of the docs at
+http://www.physics.uci.edu/~etolleru/irate-docs/ , in particular
+http://www.physics.uci.edu/~etolleru/irate-docs/formatspec.html ,
+which details the actual requirements for data to fit in the format.
+As far as I can tell, the following steps are needed to make GDF fit
+inside the IRATE format:
+
+*move everything except "/simulation_parameters" into a group named "/GridData"
+
+*rename "/simulation_parameters" to "SimulationParameters"
+
+*remove the 'field_types' group (this is not absolutely necessary, but
+the convention we had in mind for IRATE is that the dataset names
+themselves (e.g. the datasets like /data/gridxxxxxx/density)  serve as
+the field definitions.
+
+* The unit information that's in 'field_types' should then be
+attributes in either "/GridData" or "/GridData/data" following the
+naming scheme e.g. "densityunitcgs" following the unit form given in
+the IRATE doc and an additional attribute e.g. "densityunitname"
+should be added with the human-readable name of the unit. This unit
+information can also live at the dataset level, but it probably makes
+more sense to put it instead at the higher level (IRATE supports both
+ways of doing it)
+
+* The Cosmology group (as defined in the IRATE specification) must be
+added - for simulations that are not technically "cosmological", you
+can just use one of the default cosmologies (WMAP7 is a reasonable
+choice - there's a function in the IRATE tools that automatically
+takes care of all the details for this).
+
+* optional: redo all the group names to follow the CamelCase
+convention - that's what we've been using elsewhere in IRATE.  This is
+an arbitrary choice, but it would be nice for it to be consistent
+throughout the format.
+


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/docs/gdf_specification.txt
--- /dev/null
+++ b/yt/utilities/grid_data_format/docs/gdf_specification.txt
@@ -0,0 +1,282 @@
+Gridded Data Format
+===================
+
+This is a pre-release of version 1.0 of this format.  Lots of formats have come
+before, but this one is simple and will work with yt; the idea is to create an
+import and export function in yt that will read this, so that other codes (such
+as ZEUS-MP) can export directly to it or convert their data to it, and so that
+yt can export to it from any format it recognizes and reads.
+
+Caveats and Notes
+-----------------
+
+#. We avoid having many attributes on many nodes, as access can be quite slow
+#. Cartesian data only for now
+#. All grids must have the same number of ghost zones.
+#. If “/grid_parent” does not exist, parentage relationships will be
+   reconstructed and assumed to allow multiple grids
+#. No parentage can skip levels
+#. All grids are at the same time
+#. This format is designed for single-fluid calculations (with color fields)
+   but it should be viewed as extensible to multiple-fluids.
+#. All fluid quantities are assumed to be in every grid, filling every zone.  Inside
+   a given grid, for a given particle type, all the affiliated fields must be the
+   same length.  (i.e., dark matter's velocity must be the same in all dimensions.)
+#. Everything is in a single file; for extremely large datasets, the user may
+   utilize HDF5 external links to link to files other than the primary.  (This
+   enables, for instance, Enzo datasets to have only a thin wrapper that creates
+   this format.)
+#. All fluid fields in this version of the format are assumed to have the
+   dimensionality of the grid they reside in plus any ghost zones, plus any
+   additionaly dimensionality required by the staggering property.
+#. Particles may have dataspaces affiliated with them.  (See Enzo's
+   OutputParticleTypeGrouping for more information.)  This enables a light
+   wrapper around data formats with interspersed particle types.
+#. Boundary conditions are very simply specified -- future revisions
+   will feature more complicated and rich specifications for the boundary.
+
+Furthermore, we make a distinction between fluid quantities and particle
+quantities.  Particles remain affiliated with grid nodes.  Positions of
+particles are global, but this will change with future versions of this
+document.
+
+Format Declaration
+------------------
+
+The file type is HDF5.  We require version 1.8 or greater.  At the root level,
+this group must exist: ::
+
+   /gridded_data_format
+
+This must contain the (float) attribute ``format_version``.  This document
+describes version 1.0.  Optional attributes may exist:
+
+``data_software``
+   string, references the application creating the file, not the
+   author of the data
+``data_software_version``
+   string, should reference a unique version number
+``data_author``
+   string, references the person or persons who created the data,
+   should include an email address
+``data_comment``
+   string, anything about the data
+
+Top Level Nodes
+---------------
+
+At least five top-level groups must exist, although some may be empty. ::
+
+   /gridded_data_format
+   /data
+   /simulation_parameters
+   /field_types
+   /particle_types
+
+Additionally, the grid structure elements must exist.  The 0-indexed index into this array
+defines a unique "Grid ID".
+
+``/grid_left_index``
+   (int64, Nx3): global, relative to current level, and only the active region
+``/grid_dimensions``
+   (int64, Nx3): only the active regions
+``/grid_level``
+   (int64, N): level, indexed by zero
+``/grid_particle_count``
+   (int64, N): total number of particles.  (May change in subsequent versions.)
+``/grid_parent_id``
+   (int64, N): optional, may only reference a single parent
+
+Grid Fields
+-----------
+
+Underneath ``/data/`` there must be entries for every grid, of the format
+``/data/grid_%010i``.  These grids need no attributes, and underneath them
+datasets live.
+
+Fluid Fields
+++++++++++++
+
+For every grid we then define ``/data/grid_%010i/%(field)s``.
+
+Where ``%(field)s`` draws from all of the fields defined.  We define no
+standard for which fields must be present, only the names and units.  Units
+should always be ''proper'' cgs (or conversion factors should be supplied, below), and
+field names should be drawn from this list, with these names.  Not all fields
+must be represented.  Field must extend beyond the active region if ghost zones
+are included.  All pre-defined fields are assumed to be cell-centered unless this
+is overridden in ``field_types``.
+
+  * ``density`` (g/cc)
+  * ``temperature`` (K)
+  * ``specific_thermal_energy`` (erg/g)
+  * ``specific_energy`` (erg/g, includes kinetic and magnetic)
+  * ``magnetic_energy`` (erg/g)
+  * ``velocity_x`` (cm/s)
+  * ``velocity_y`` (cm/s)
+  * ``velocity_z`` (cm/s)
+  * ``species_density_%s`` (g/cc) where %s is the species name including ionization
+    state, such as H2I, HI, HII, CO, "elec" for electron
+  * ``mag_field_x``
+  * ``mag_field_y``
+  * ``mag_field_z``
+
+Particle Fields
++++++++++++++++
+
+Particles are more expensive to sort and identify based on "type" -- for
+instance, dark matter versus star particles.  The particles should be separated
+based on type, under the group ``/data/grid_%010i/particles/``.
+
+The particles group will have sub-groups, each of which will be named after the
+type of particle it represents.  We only specify "dark_matter" as a type;
+anything else must be specified as described below.
+
+Each node, for instance ``/data/grid_%010i/particles/dark_matter/``, must
+contain the following fields:
+
+  * ``mass`` (g)
+  * ``id``
+  * ``position_x`` (in physical units)
+  * ``position_y`` (in physical units)
+  * ``position_z`` (in physical units)
+  * ``velocity_x`` (cm/s)
+  * ``velocity_y`` (cm/s)
+  * ``velocity_z`` (cm/s)
+  * ``dataspace`` (optional) an HDF5 dataspace to be used when opening
+    all affiliated fields.   If this is to be used, it must be appropriately set in
+    the particle type definition.  This is of type ``H5T_STD_REF_DSETREG``.
+    (See Enzo's OutputParticleTypeGrouping for an example.)
+
+Additional Fields
++++++++++++++++++
+
+Any additional fields from the data can be added, but must have a corresponding
+entry in the root field table (described below.)  The naming scheme is to be as
+explicit as possible, with units in cgs (or a conversion factor to the standard
+cgs unit, in the field table.)
+
+Attribute Table
+---------------
+
+In the root node, we define several groups which contain attributes.
+
+Simulation Parameters
++++++++++++++++++++++
+
+These attributes will all be associated with ``/simulation_parameters``.
+
+``refine_by``
+   relative global refinement
+``dimensionality``
+   1-, 2- or 3-D data
+``domain_dimensions``
+   dimensions in the top grid
+``current_time``
+   current time in simulation, in seconds, from “start” of simulation
+``domain_left_edge``
+   the left edge of the domain, in cm
+``domain_right_edge``
+   the right edge of the domain, in cm
+``unique_identifier``
+   regarded as a string, but can be anything
+``cosmological_simulation``
+   0 or 1
+``num_ghost_zones``
+   integer
+``field_ordering``
+   integer: 0 for C, 1 for Fortran
+``boundary_conditions``
+   integer (6): 0 for periodic, 1 for mirrored, 2 for outflow.  Needs one for each face
+   of the cube.  Any past the dimensionality should be set to -1.  The order of specification
+   goes left in 0th dimension, right in 0th dimension, left in 1st dimension, right in 1st dimensions,
+   left in 2nd dimension, right in 2nd dimension.  Note also that yt does not currently support non-periodic
+   boundary conditions, and that the assumption of periodicity shows up primarily in plots and
+   covering grids.
+
+Optionally, attributes for cosmological simulations can be provided, if
+cosmological_simulation above is set to 1:
+
+  * current_redshift
+  * omega_matter (at z=0)
+  * omega_lambda (at z=0)
+  * hubble_constant (h100)
+
+Fluid Field Attributes
+++++++++++++++++++++++
+
+Every field that is included that is not both in CGS already and in the list
+above requires parameters.  If a field is in the above list but is not in CGS,
+only the field_to_cgs attribute is necessary.  These will be stored under
+``/field_types`` and each must possess the following attributes:
+
+``field_name``
+   a string that will be used to describe the field; can contain spaces.
+``field_to_cgs``
+   a float that will be used to convert the field to cgs units, if necessary.
+   Set to 1.0 if no conversion necessary.  Note that if non-CGS units are desired
+   this field should simply be viewed as the value by which field values are
+   multiplied to get to some internally consistent unit system.
+``field_units``
+   a string that names the units.
+``staggering``
+   an integer: 0 for cell-centered, 1 for face-centered, 2 for vertex-centered.
+   Non-cellcentered data will be linearly-interpolated; more complicated
+   reconstruction will be defined in a future version of this standard; for 1.0
+   we only allow for simple definitions.
+
+Particle Types
+++++++++++++++
+
+Every particle type that is not recognized (i.e., all non-Dark Matter types)
+needs to have an entry under ``/particle_types``.  Each entry must possess the
+following attributes:
+
+``particle_type_name``
+   a string that will be used to describe the field; can contain spaces.
+``particle_use_dataspace``
+   (optional) if 1, the dataspace (see particle field definition above) will be used
+   for all particle fields for this type of particle.  Useful if a given type of particle
+   is embedded inside a larger list of different types of particle.
+``particle_type_num``
+   an integer giving the total number of particles of this type.
+
+For instance, to define a particle of type ``accreting_black_hole``, the file
+must contain ``/particle_types/accreting_black_hole``, with the
+``particle_type_name`` attribute of "Accreting Black Hole".
+
+Particle Field Attributes
++++++++++++++++++++++++++
+
+Every particle type that contains a new field (for instance, ``accretion_rate``)
+needs to have an entry under ``/particle_types/{particle_type_name}/{field_name}``
+containing the following attributes:
+
+``field_name``
+   a string that will be used to describe the field; can contain spaces.
+``field_to_cgs``
+   a float that will be used to convert the field to cgs units, if necessary.
+   Set to 1.0 if no conversion necessary.
+``field_units``
+   a string that names the units.
+
+Role of YT
+----------
+
+yt will provide a reader for this data, so that any data in this format can be
+used by the code.  Additionally, the names and specifications in this code
+reflect the internal yt data structures.
+
+yt will also provide a writer for this data, which will operate on any existing
+data format.  Provided that a simulation code can read this data, this will
+enable cross-platform comparison.  Furthermore, any external piece of software
+(i.e., Stranger) that implements reading this format will be able to read any
+format of data tha yt understands.
+
+Example File
+------------
+
+An example file constructed from the ``RD0005-mine`` dataset is available
+at http://yt.enzotools.org/files/RD0005.gdf .  It is not yet a complete
+conversion, but it is a working proof of concept.  Readers and writers are
+forthcoming.


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/scripts/convert_distributed_athena.py
--- /dev/null
+++ b/yt/utilities/grid_data_format/scripts/convert_distributed_athena.py
@@ -0,0 +1,22 @@
+from grid_data_format import *
+import sys
+# Assumes that last input is the basename for the athena dataset.
+# i.e. kh_3d_mhd_hlld_128_beta5000_sub_tanhd.0030
+basename = sys.argv[-1]
+converter = AthenaDistributedConverter(basename)
+converter.convert()
+
+# If you have units information, set up a conversion dictionary for
+# each field.  Each key is the name of the field that Athena uses.
+# Each value is what you have to multiply the raw output from Athena
+# by to get cgs units.
+
+# code_to_cgs = {'density':1.0e3,
+# 	       'total_energy':1.0e-3,
+# 	       'velocity_x':1.2345,
+# 	       'velocity_y':1.2345,
+# 	       'velocity_z':1.2345}
+
+# converter = AthenaDistributedConverter(basename, field_conversions=code_to_cgs)
+# converter.convert()
+


diff -r 3601a7af7ffa72d3c1bdd714b242fc6484b1ea6a -r e70d7b2879653dddec7df54651dee99d4931efbd yt/utilities/grid_data_format/scripts/convert_single_athena.py
--- /dev/null
+++ b/yt/utilities/grid_data_format/scripts/convert_single_athena.py
@@ -0,0 +1,23 @@
+from grid_data_format import *
+import sys
+# Assumes that last input is the basename for the athena dataset.
+# i.e. kh_3d_mhd_hlld_128_beta5000_sub_tanhd.0030
+basename = sys.argv[-1]
+converter = AthenaConverter(basename)
+converter.convert()
+
+# If you have units information, set up a conversion dictionary for
+# each field.  Each key is the name of the field that Athena uses.
+# Each value is what you have to multiply the raw output from Athena
+# by to get cgs units.
+
+# code_to_cgs = {'density':1.0e3,
+# 	       'total_energy':1.0e-3,
+# 	       'velocity_x':1.2345,
+# 	       'velocity_y':1.2345,
+# 	       'velocity_z':1.2345}
+
+# converter = AthenaDistributedConverter(basename, field_conversions=code_to_cgs)
+# converter.convert()
+
+

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.