[yt-svn] commit/yt: jzuhone: Merged in xarthisius/yt (pull request #2503)

commits-noreply at bitbucket.org commits-noreply at bitbucket.org
Tue Jan 31 10:35:39 PST 2017


1 new commit in yt:

https://bitbucket.org/yt_analysis/yt/commits/dc439e4d8dbb/
Changeset:   dc439e4d8dbb
Branch:      yt
User:        jzuhone
Date:        2017-01-31 18:35:33+00:00
Summary:     Merged in xarthisius/yt (pull request #2503)

Add checksum property to the Dataset class
Affected #:  2 files

diff -r 9011f189f5aa3a1a0221f2e2f28cbe9de17b485f -r dc439e4d8dbb93b261ebc8c064572523311d56ce yt/data_objects/static_output.py
--- a/yt/data_objects/static_output.py
+++ b/yt/data_objects/static_output.py
@@ -348,6 +348,49 @@
             return hashlib.md5(s.encode('utf-8')).hexdigest()
         except ImportError:
             return s.replace(";", "*")
+   
+    _checksum = None
+    @property
+    def checksum(self):
+        '''
+        Computes md5 sum of a dataset.
+
+        Note: Currently this property is unable to determine a complete set of
+        files that are a part of a given dataset. As a first approximation, the
+        checksum of :py:attr:`~parameter_file` is calculated. In case
+        :py:attr:`~parameter_file` is a directory, checksum of all files inside
+        the directory is calculated.
+        '''
+        if self._checksum is None:
+            try:
+                import hashlib
+            except ImportError:
+                self._checksum = 'nohashlib'
+                return self._checksum
+
+            def generate_file_md5(m, filename, blocksize=2**20):
+                with open(filename , "rb") as f:
+                    while True:
+                        buf = f.read(blocksize)
+                        if not buf:
+                            break
+                        m.update(buf)
+
+            m = hashlib.md5()
+            if os.path.isdir(self.parameter_filename):
+                for root, _, files in os.walk(self.parameter_filename):
+                    for fname in files:
+                        fname = os.path.join(root, fname)
+                        generate_file_md5(m, fname)
+            elif os.path.isfile(self.parameter_filename):
+                generate_file_md5(m, self.parameter_filename)
+            else:
+                m = 'notafile'
+
+            if hasattr(m, 'hexdigest'):
+                m = m.hexdigest()
+            self._checksum = m
+        return self._checksum
 
     domain_left_edge = MutableAttribute()
     domain_right_edge = MutableAttribute()

diff -r 9011f189f5aa3a1a0221f2e2f28cbe9de17b485f -r dc439e4d8dbb93b261ebc8c064572523311d56ce yt/data_objects/tests/test_dataset_access.py
--- a/yt/data_objects/tests/test_dataset_access.py
+++ b/yt/data_objects/tests/test_dataset_access.py
@@ -4,7 +4,10 @@
     assert_equal, \
     fake_amr_ds, \
     fake_particle_ds, \
-    fake_random_ds
+    fake_random_ds, \
+    requires_file
+from yt.utilities.answer_testing.framework import \
+    data_dir_load
 
 # This will test the "dataset access" method.
 
@@ -69,3 +72,10 @@
 
     pds = fake_particle_ds(npart=128)
     assert pds.particle_type_counts == {'io': 128}
+
+
+g30 = "IsolatedGalaxy/galaxy0030/galaxy0030"
+ at requires_file(g30)
+def test_checksum():
+    assert fake_random_ds(16).checksum == 'notafile'
+    assert data_dir_load(g30).checksum == '6169536e4b9f737ce3d3ad440df44c58'

Repository URL: https://bitbucket.org/yt_analysis/yt/

--

This is a commit notification from bitbucket.org. You are receiving
this because you have the service enabled, addressing the recipient of
this email.



More information about the yt-svn mailing list