[Yt-svn] yt-commit r389 - in trunk: tests yt/lagos

mturk at wrangler.dreamhost.com mturk at wrangler.dreamhost.com
Thu Mar 27 10:41:48 PDT 2008


Author: mturk
Date: Thu Mar 27 10:41:47 2008
New Revision: 389
URL: http://yt.spacepope.org/changeset/389

Log:
Adding unit tests for the HDF5 reader, as well as extending the HDF5 reader to
work with all overlapping native types between HDF5 and NumPy.



Added:
   trunk/tests/test_hdf5_reader.py
Modified:
   trunk/yt/lagos/HDF5LightReader.c

Added: trunk/tests/test_hdf5_reader.py
==============================================================================
--- (empty file)
+++ trunk/tests/test_hdf5_reader.py	Thu Mar 27 10:41:47 2008
@@ -0,0 +1,41 @@
+import unittest, numpy, tables, sys, os, os.path
+sys.path.insert(0,".")
+
+from new import classobj
+
+from yt.lagos.HDF5LightReader import ReadData, ReadingError
+
+my_dtypes = ['short','int','longlong','ushort','uint','ulonglong',
+             'float','double']
+
+class HDF5LightTestIOBase(object):
+    def setUp(self):
+        self.rand_array = numpy.random.random(3000).reshape((30,10,10)).astype(self.dtype)
+    def test_check_io(self):
+        my_table = tables.openFile("testing_h5lt_io.h5","w")
+        my_table.createArray("/","%s" % (self.dtype),self.rand_array)
+        my_table.close()
+        recv_array = ReadData("testing_h5lt_io.h5", "/%s" % (self.dtype))
+        self.assert_(numpy.all(recv_array == self.rand_array))
+    def tearDown(self):
+        os.unlink("testing_h5lt_io.h5")
+
+for dtype in my_dtypes:
+    temp = classobj("TestingIO_%s" % (dtype),
+            (HDF5LightTestIOBase,unittest.TestCase), {'dtype':dtype})
+    exec('TestingIO_%s = temp' % dtype)
+
+class HDF5LightTestError(unittest.TestCase):
+    def test_no_file(self):
+        fn = "%s.h5" % int(numpy.random.random(1) * 1e6)
+        self.assertRaises(ReadingError, ReadData,fn,"/Nothing")
+    def test_no_dataset(self):
+        fn = "%s.h5" % int(numpy.random.random(1) * 1e6)
+        my_table = tables.openFile("testing_h5lt_io.h5","w")
+        my_table.close()
+        self.assertRaises(ReadingError, ReadData,fn,"/Nothing")
+    def tearDown(self):
+        if os.path.exists("testing_h5lt_io.h5"): os.unlink("testing_h5lt_io.h5")
+
+if __name__ == "__main__":
+    unittest.main()

Modified: trunk/yt/lagos/HDF5LightReader.c
==============================================================================
--- trunk/yt/lagos/HDF5LightReader.c	(original)
+++ trunk/yt/lagos/HDF5LightReader.c	Thu Mar 27 10:41:47 2008
@@ -43,109 +43,168 @@
 {
     char *filename, *nodename;
 
-    hsize_t *my_dims;
-    hid_t file_id;
+    hsize_t *my_dims = NULL;
+    npy_intp *dims = NULL;
+    hid_t file_id, datatype_id, native_type_id, dataset;
     herr_t my_error;
+    htri_t file_exists;
     H5T_class_t class_id;
     size_t type_size;
-    int my_typenum, my_rank, i, my_size;
-    void *my_data;
-    PyArrayObject *my_array;
+    int my_typenum, my_rank, i;
+    H5E_auto_t err_func;
+    void *err_datastream;
+    PyArrayObject *my_array = NULL;
+    file_id = datatype_id = native_type_id = dataset = 0;
 
     if (!PyArg_ParseTuple(args, "ss",
             &filename, &nodename))
         return PyErr_Format(_hdf5ReadError,
                "ReadHDF5DataSet: Invalid parameters.");
 
-    //char* filename = *ofilename;
-    //char* nodename = *onodename;
+    /* How portable is this? */
+    if (access(filename, R_OK) < 0) {
+        PyErr_Format(_hdf5ReadError,
+                 "ReadHDF5DataSet: %s does not exist, or no read permissions\n",
+                     filename);
+        goto _fail;
+    }
+
+    file_exists = H5Fis_hdf5(filename);
+    if (file_exists == 0) {
+        PyErr_Format(_hdf5ReadError,
+                 "ReadHDF5DataSet: %s is not an HDF5 file", filename);
+        goto _fail;
+    }
 
     file_id = H5Fopen (filename, H5F_ACC_RDONLY, H5P_DEFAULT); 
-    
+
+    if (file_id < 0) {
+        PyErr_Format(_hdf5ReadError,
+                 "ReadHDF5DataSet: Unable to open %s", nodename);
+        goto _fail;
+    }
+
+    /* We turn off error reporting briefly, because it turns out that
+       reading datasets with group names is more forgiving than finding
+       datasets with group names using the high-level interface. */
+
+    H5Eget_auto(&err_func, &err_datastream);
+    H5Eset_auto(NULL, NULL);
+    dataset = H5Dopen(file_id, nodename);
+    H5Eset_auto(err_func, err_datastream);
+
+    if(dataset < 0) {
+        PyErr_Format(_hdf5ReadError,
+                 "ReadHDF5DataSet: Unable to open dataset (%s, %s).",
+                                    filename, nodename);
+        goto _fail;
+    }
+
     my_error = H5LTget_dataset_ndims ( file_id, nodename, &my_rank );
     if(my_error) {
         PyErr_Format(_hdf5ReadError,
-                 "ReadHDF5DataSet: Problem getting dataset info (%s, %s).",
+                 "ReadHDF5DataSet: Problem getting dataset rank (%s, %s).",
                                     filename, nodename);
         goto _fail;
     }
 
+    /* How do we keep this from leaking in failures? */
     my_dims = malloc(sizeof(hsize_t) * my_rank);
     my_error = H5LTget_dataset_info ( file_id, nodename,
                 my_dims, &class_id, &type_size );
-    if(my_error) {
+
+    if(my_error < 0) {
         PyErr_Format(_hdf5ReadError,
                  "ReadHDF5DataSet: Problem getting dataset info (%s, %s).",
                                     filename, nodename);
         goto _fail;
     }
 
-    my_size = 1;
-    npy_intp *dims = malloc(my_rank * sizeof(npy_intp));
-    for (i = 0; i < my_rank; i++) {
-      dims[i] = (npy_intp) my_dims[i];
-      my_size *= my_dims[i];
-    }
-
-    if (!(class_id == H5T_FLOAT)){
-      PyErr_Format(_hdf5ReadError,
-          "ReadHDF5DataSet: Unrecognized datatype, size %i.", type_size);
-      goto _fail;
-    }
-
-    /*
-    switch (type_size) {
-      case 4:
-        fprintf(stderr, "Reading (%i) %i\n", my_size, type_size);
-        my_typenum = NPY_FLOAT32;
-        H5LTread_dataset_float(file_id, nodename, my_data);
-        break;
-      case 8:
-        fprintf(stderr, "Reading (%i) %i\n", my_size, type_size);
-        my_typenum = NPY_FLOAT64;
-        H5LTread_dataset_double(file_id, nodename, my_data);
-        break;
-      default:
+    dims = malloc(my_rank * sizeof(npy_intp));
+    for (i = 0; i < my_rank; i++) dims[i] = (npy_intp) my_dims[i];
+
+   /*
+   http://terra.rice.edu/comp.res/apps/h/hdf5/docs/RM_H5T.html#Datatype-GetNativeType
+
+   hid_t H5Tget_native_type(hid_t type_id, H5T_direction_t direction  ) returns
+   from the following list:
+        H5T_NATIVE_CHAR         NPY_??
+        H5T_NATIVE_SHORT        NPY_SHORT
+        H5T_NATIVE_INT          NPY_INT
+        H5T_NATIVE_LONG         NPY_LONG
+        H5T_NATIVE_LLONG        NPY_LONGLONG
+
+        H5T_NATIVE_UCHAR        NPY_??
+        H5T_NATIVE_USHORT       NPY_USHORT
+        H5T_NATIVE_UINT         NPY_UINT
+        H5T_NATIVE_ULONG        NPY_ULONG
+        H5T_NATIVE_ULLONG       NPY_ULONGLONG
+
+        H5T_NATIVE_FLOAT        NPY_FLOAT
+        H5T_NATIVE_DOUBLE       NPY_DOUBLE
+        H5T_NATIVE_LDOUBLE      NPY_LONGDOUBLE
+    */
+
+    datatype_id = H5Dget_type(dataset);
+    native_type_id = H5Tget_native_type(datatype_id, H5T_DIR_ASCEND);
+
+    /* Behavior here is intentionally undefined for non-native types */
+    int my_desc_type;
+         if(H5Tequal(native_type_id, H5T_NATIVE_SHORT   ) > 0){my_desc_type = NPY_SHORT;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_INT     ) > 0){my_desc_type = NPY_INT;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_LONG    ) > 0){my_desc_type = NPY_LONG;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_LLONG   ) > 0){my_desc_type = NPY_LONGLONG;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_USHORT  ) > 0){my_desc_type = NPY_USHORT;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_UINT    ) > 0){my_desc_type = NPY_UINT;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_ULONG   ) > 0){my_desc_type = NPY_ULONG;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_ULLONG  ) > 0){my_desc_type = NPY_ULONGLONG;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_FLOAT   ) > 0){my_desc_type = NPY_FLOAT;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_DOUBLE  ) > 0){my_desc_type = NPY_DOUBLE;}
+    else if(H5Tequal(native_type_id, H5T_NATIVE_LDOUBLE ) > 0){my_desc_type = NPY_LONGDOUBLE;}
+    else {
+          PyErr_Format(_hdf5ReadError,
+                       "ReadHDF5DataSet: Unrecognized datatype.  Use a more advanced reader.");
+          goto _fail;
+    }
+
+    my_array = (PyArrayObject *) PyArray_SimpleNewFromDescr(my_rank, dims,
+                PyArray_DescrFromType(my_desc_type));
+    if (!my_array) {
         PyErr_Format(_hdf5ReadError,
-            "ReadHDF5DataSet: Unrecognized datatype, size %i.", type_size);
+                 "ReadHDF5DataSet: Unable to create NumPy array.");
         goto _fail;
-        break; //haha goto!
     }
-    */
-    my_array = (PyArrayObject *) PyArray_SimpleNewFromDescr(my_rank, dims,
-                PyArray_DescrFromType(NPY_DOUBLE));
 
-    H5LTread_dataset_double(file_id, nodename, (void *) my_array->data);
-    H5Fclose(file_id);
-    /*
-    my_array = (PyArrayObject *) PyArray_SimpleNewFromData(my_rank, dims,
-                                    NPY_FLOAT64, (void *)my_data);
-    */
+    H5LTread_dataset(file_id, nodename, native_type_id, (void *) my_array->data);
 
     PyArray_UpdateFlags(my_array, NPY_OWNDATA | my_array->flags);
-    // 'N' does not increase the reference count
     PyObject *return_value = Py_BuildValue("N", my_array);
 
-    free(dims);
+    H5Fclose(file_id);
+    H5Dclose(dataset);
+    H5Tclose(native_type_id);
+    H5Tclose(datatype_id);
     free(my_dims);
+    free(dims);
 
     return return_value;
 
     _fail:
       Py_XDECREF(my_array);
-      if(file_id) H5Fclose(file_id);
-      if(my_data) free(my_data);
-      if(my_dims) free(my_dims);
-      if(dims) free(dims);
+      if(!(file_id <= 0)&&(H5Iget_ref(file_id))) H5Fclose(file_id);
+      if(!(dataset <= 0)&&(H5Iget_ref(dataset))) H5Dclose(dataset);
+      if(!(native_type_id <= 0)&&(H5Iget_ref(native_type_id))) H5Tclose(native_type_id);
+      if(!(datatype_id <= 0)&&(H5Iget_ref(datatype_id))) H5Tclose(datatype_id);
+      if(my_dims != NULL) free(my_dims);
+      if(dims != NULL) free(dims);
       return NULL;
 }
 
 static PyMethodDef _hdf5LightReaderMethods[] = {
     {"ReadData", Py_ReadHDF5DataSet, METH_VARARGS},
-    {NULL, NULL} /* Sentinel */
+    {NULL, NULL} 
 };
 
-/* platform independent*/
 #ifdef MS_WIN32
 __declspec(dllexport)
 #endif
@@ -156,13 +215,6 @@
     m = Py_InitModule("HDF5LightReader", _hdf5LightReaderMethods);
     d = PyModule_GetDict(m);
     _hdf5ReadError = PyErr_NewException("HDF5LightReader.ReadingError", NULL, NULL);
-    PyDict_SetItemString(d, "error", _hdf5ReadError);
+    PyDict_SetItemString(d, "ReadingError", _hdf5ReadError);
     import_array();
 }
-
-/*
- * Local Variables:
- * mode: C
- * c-file-style: "python"
- * End:
- */



More information about the yt-svn mailing list