[Yt-svn] yt-commit r1573 - trunk/yt/lagos

mturk at wrangler.dreamhost.com mturk at wrangler.dreamhost.com
Tue Jan 12 16:05:04 PST 2010


Author: mturk
Date: Tue Jan 12 16:05:01 2010
New Revision: 1573
URL: http://yt.enzotools.org/changeset/1573

Log:
Updating particle IO to avoid closing file handles between calls if able.

A quick benchmark -- in serial, reading in 7 fields for 20 million particles
from 6500 grids (AMR) now takes about 5 seconds, as opposed to before taking
... a lot longer!



Modified:
   trunk/yt/lagos/DataReadingFuncs.py
   trunk/yt/lagos/HDF5LightReader.c

Modified: trunk/yt/lagos/DataReadingFuncs.py
==============================================================================
--- trunk/yt/lagos/DataReadingFuncs.py	(original)
+++ trunk/yt/lagos/DataReadingFuncs.py	Tue Jan 12 16:05:01 2010
@@ -203,8 +203,9 @@
                         conv_factors):
         filenames = [g.filename for g in grid_list]
         ids = [g.id for g in grid_list]
+        filenames, ids = zip(*sorted(zip(filenames,ids)))
         return HDF5LightReader.ReadParticles(
-            rtype, fields, filenames, ids, conv_factors, args, 1)
+            rtype, fields, list(filenames), list(ids), conv_factors, args, 1)
 
     def modify(self, field):
         return field.swapaxes(0,2)

Modified: trunk/yt/lagos/HDF5LightReader.c
==============================================================================
--- trunk/yt/lagos/HDF5LightReader.c	(original)
+++ trunk/yt/lagos/HDF5LightReader.c	Tue Jan 12 16:05:01 2010
@@ -58,6 +58,8 @@
     int (*count_func_longdouble)(struct particle_validation_ *data);
     void *validation_reqs;
     void *particle_position[3];
+    hid_t file_id;
+    char filename[1024];
 } particle_validation;
 
 typedef struct region_validation_ {
@@ -746,6 +748,7 @@
     pv.particle_position[0] = pv.particle_position[1] = pv.particle_position[2] = NULL;
     pv.return_values = NULL;
     pv.npy_types = NULL;
+    pv.file_id = -1;
 
     /* Set initial values for pv */
     pv.stride_size = stride_size;
@@ -837,6 +840,11 @@
         goto _fail;
       }
     }
+    if(pv.file_id >= 0) {
+      H5Fclose(pv.file_id);
+      pv.file_id = -1;
+      strncpy(pv.filename, "Expired filename", 1023);
+    }
     /* Now we know how big to make our array, hooray. */
     pv.update_count = 0;
     
@@ -866,6 +874,7 @@
         goto _fail;
       }
     }
+    if(pv.file_id >= 0) {H5Fclose(pv.file_id); pv.file_id = -1;}
 
     /* Let's pack up our return values */
     PyObject *my_list = PyList_New(0);
@@ -1023,10 +1032,18 @@
 
     /* First we open the file */
 
-    file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+
+    if(strncmp(filename, pv->filename, 1023) != 0) {
+      //fprintf(stderr, "Comparison failed: %s , %s\n", filename, pv->filename);
+      if(pv->file_id >= 0) H5Fclose(pv->file_id);
+      pv->file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
+      strncpy(pv->filename, filename, 1023);
+      //fprintf(stderr, "Setting: %s , %s\n", filename, pv->filename);
+    }
+    file_id = pv->file_id;
     if (file_id < 0) {
         PyErr_Format(_hdf5ReadError,
-                 "ReadHDF5DataSet: Unable to open %s", filename);
+                 "run_validators: Unable to open %s (%d)", filename, read);
         goto _fail;
     }
 
@@ -1192,7 +1209,7 @@
       free(dataset_read);
     }
 
-    H5Fclose(file_id);
+    //H5Fclose(file_id); // We don't do this here, because we cache out file_id
 
     return 1;
 
@@ -1213,7 +1230,7 @@
       }
       free(dataset_read);
     }
-    if(!(file_id <= 0)&&(H5Iget_ref(file_id))) H5Fclose(file_id);
+    //if(!(file_id <= 0)&&(H5Iget_ref(file_id))) H5Fclose(file_id);
     
     return 0;
 }



More information about the yt-svn mailing list