[yt-users] pickling clumps

Charles Hansen chansen at astro.berkeley.edu
Fri May 28 11:32:12 PDT 2010


pf.h.save_object does work as advertised for me.  Thanks for the 
pointer.  For what it is worth, clumps do not have the attribute 
save_object, so masterclump.save_object is not a valid function.

I tried pickling again and I can no longer even dump the clumps.  I 
don't know why the dumping 'worked' before and not now, though I have 
been trying it on more complicated clumps than before.  I pasted the 
error message if you are interested.

In [6]: pickle.dump(masterclump, outfile)
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)

/nics/c/home/chansen/lib/yt/src/yt-trunk-svn/scripts/iyt in <module>()
----> 1
      2
      3
      4
      5

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in dump(obj, file, 
protocol)
   1360
   1361 def dump(obj, file, protocol=None):
-> 1362     Pickler(file, protocol).dump(obj)
   1363
   1364 def dumps(obj, protocol=None):

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in dump(self, obj)
    222         if self.proto >= 2:
    223             self.write(PROTO + chr(self.proto))
--> 224         self.save(obj)
    225         self.write(STOP)
    226

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    329
    330         # Save the reduce() output and finally memoize the object

--> 331         self.save_reduce(obj=obj, *rv)
    332
    333     def persistent_id(self, obj):

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
save_reduce(self, func, args, state, listitems, dictitems, obj)
    399         else:
    400             save(func)
--> 401             save(args)
    402             write(REDUCE)
    403

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self, 
obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563
    564         if id(obj) in memo:

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self, obj)
    598
    599         self.memoize(obj)
--> 600         self._batch_appends(iter(obj))
    601
    602     dispatch[ListType] = save_list

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
_batch_appends(self, items)
    613         if not self.bin:
    614             for x in items:
--> 615                 save(x)
    616                 write(APPEND)
    617             return

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    329
    330         # Save the reduce() output and finally memoize the object

--> 331         self.save_reduce(obj=obj, *rv)
    332
    333     def persistent_id(self, obj):

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
save_reduce(self, func, args, state, listitems, dictitems, obj)
    399         else:
    400             save(func)
--> 401             save(args)
    402             write(REDUCE)
    403

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self, 
obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563
    564         if id(obj) in memo:

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self, obj)
    598
    599         self.memoize(obj)
--> 600         self._batch_appends(iter(obj))
    601
    602     dispatch[ListType] = save_list

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
_batch_appends(self, items)
    613         if not self.bin:
    614             for x in items:
--> 615                 save(x)
    616                 write(APPEND)
    617             return

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    329
    330         # Save the reduce() output and finally memoize the object

--> 331         self.save_reduce(obj=obj, *rv)
    332
    333     def persistent_id(self, obj):

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
save_reduce(self, func, args, state, listitems, dictitems, obj)
    399         else:
    400             save(func)
--> 401             save(args)
    402             write(REDUCE)
    403

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self, 
obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563
    564         if id(obj) in memo:

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self, obj)
    598
    599         self.memoize(obj)
--> 600         self._batch_appends(iter(obj))
    601
    602     dispatch[ListType] = save_list

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
_batch_appends(self, items)
    613         if not self.bin:
    614             for x in items:
--> 615                 save(x)
    616                 write(APPEND)
    617             return

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
    329
    330         # Save the reduce() output and finally memoize the object

--> 331         self.save_reduce(obj=obj, *rv)
    332
    333     def persistent_id(self, obj):

/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in 
save_reduce(self, func, args, state, listitems, dictitems, obj)
    403
    404         if obj is not None:
--> 405             self.memoize(obj)
    406
    407         # More new special cases (that work with older protocols as


/nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in memoize(self, obj)
    242         if self.fast:
    243             return
--> 244         assert id(obj) not in self.memo
    245         memo_len = len(self.memo)
    246         self.write(self.put(memo_len))

AssertionError:



Matthew Turk wrote:
> Hi Britton, Charles,
>
> Britton is correct, YT has its own routine for pickling.  This is a
> couple step process, because specifically the pickling of parameter
> files is somewhat ill-defined as a problem.  When you initialize a
> parameter file, it's given a (likely) unique hash that corresponds to
> its location, some of its contents, last modified time (or in Enzo's
> case, the CurrentTimeIdentifier in the parameter file) and this is
> stored in ~/.yt/parameter_files.csv.  By this mechanism, one can store
> the pointer to a parameter file that exists on disk with a
> bidirectional hash.  When a new parameter file is opened, it checks to
> see if the path location in the .csv file needs to be updated, and if
> so, it does.
>
> So when pickling a YT object, it stores the minimum set of information
> necessary to recreate that object.  For a sphere, region, etc etc,
> this is really just the necessary arguments to recreate the empty
> object, along with the hash of the parameter file that it belongs to.
> So the actual information necessary to pickle simple objects is very
> small: hash, arguments.  (Called "_con_args" in the source, for
> Constructor Arguments.)  When this is unpickled, the parameter file is
> recreated (or grabbed from in memory, if it has already been
> instantiated) and then the object is recreated.
>
> For more complicated objects, like Clumps and ExtractedSets, we have
> to store more information -- because the constructor arguments are
> much larger and more complicated.  But the basic idea is the same.
>
> When using save_object, the same pickling routine gets called, but
> instead of being loosely affiliated and put into a separate file, it
> gets stored as a dataset in a the corresponding .yt file, which is
> itself an HDF5 file.  So the results should be the same, except
> save_object and load_object remove the extra file from the equation.
>
> The error you're seeing above seems to be related to an incompletely
> saved or corrupted file; can you replicate this in a very simple,
> quick fashion?  I would suggest getting a clump that is very, very
> small, taken from a small data region, and seeing if that will pickle
> and unpickle correctly.  If it does, then we know that perhaps the
> previous pickle file was corrupted, or that something is wrong with
> the yt pickling protocol.  If it doesn't work, then we *know*
> something is wrong with the yt pickling protocol.
>
> Best,
>
> Matt
>
> On Thu, May 27, 2010 at 6:43 PM, Britton Smith <brittonsmith at gmail.com> wrote:
>   
>> Hi Chris,
>>
>> YT has its own routine designed to pickle data objects for saving and
>> reloading.  For a given pf, you can do pf.h.save_object(object, name) and
>> the object will be saved to the .yt file associated with the dataset.  You
>> can then get it back with object = pf.h.load_object(name).  I have used this
>> successfully to save the mast_clump structure from clump finding.
>>
>> Britton
>>
>> On Thu, May 27, 2010 at 9:26 PM, Charles Hansen <chansen at astro.berkeley.edu>
>> wrote:
>>     
>>> The find_clumps function is working correctly for me I believe, but the
>>> analysis takes a very long time and I'd like to save the master_clump object
>>> (from
>>> http://yt.enzotools.org/doc/cookbook/recipes.html#cookbook-find-clumps) for
>>> further analysis.  I've tried pickling master_clump.  It dumps correctly,
>>> but gives an EOF error (below) on loading.  Is there another way to save
>>> master_clump?
>>>
>>> Thanks,
>>> Charles
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load(file)
>>>   1368
>>>   1369 def load(file):
>>> -> 1370     return Unpickler(file).load()
>>>   1371
>>>   1372 def loads(str):
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load(self)
>>>    856             while 1:
>>>    857                 key = read(1)
>>> --> 858                 dispatch[key](self)
>>>    859         except _Stop, stopinst:
>>>    860             return stopinst.value
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load_eof(self)
>>>    878
>>>    879     def load_eof(self):
>>> --> 880         raise EOFError
>>>    881     dispatch[''] = load_eof
>>>    882
>>>
>>> EOFError:
>>>
>>> _______________________________________________
>>> yt-users mailing list
>>> yt-users at lists.spacepope.org
>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>       
>> _______________________________________________
>> yt-users mailing list
>> yt-users at lists.spacepope.org
>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>
>>
>>     
> _______________________________________________
> yt-users mailing list
> yt-users at lists.spacepope.org
> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>   




More information about the yt-users mailing list