[yt-users] pickling clumps

Matthew Turk matthewturk at gmail.com
Fri May 28 11:49:57 PDT 2010


Hi Charles,

This feels a bit more like a file system errorl.  What happens if you
use the cPickle module instead of the Pickle module?

-Matt

On Fri, May 28, 2010 at 11:46 AM, Charles Hansen
<chansen at astro.berkeley.edu> wrote:
> sorry, the preceding line was
>
> outfile = open("dummy.pkl", "wb")
>
> The type is...
>
> In [73]: type(outfile)
> Out[73]: <type 'file'>
>
>
> Matthew Turk wrote:
>>
>> Hi Charles,
>>
>> What's "outfile" in this case?  Can you send a bit more of your setup
>> of the problem, or at least the output of "type(outfile)"?
>>
>> -Matt
>>
>> On Fri, May 28, 2010 at 11:32 AM, Charles Hansen
>> <chansen at astro.berkeley.edu> wrote:
>>
>>>
>>> pf.h.save_object does work as advertised for me.  Thanks for the pointer.
>>>  For what it is worth, clumps do not have the attribute save_object, so
>>> masterclump.save_object is not a valid function.
>>>
>>> I tried pickling again and I can no longer even dump the clumps.  I don't
>>> know why the dumping 'worked' before and not now, though I have been
>>> trying
>>> it on more complicated clumps than before.  I pasted the error message if
>>> you are interested.
>>>
>>> In [6]: pickle.dump(masterclump, outfile)
>>>
>>> ---------------------------------------------------------------------------
>>> AssertionError                            Traceback (most recent call
>>> last)
>>>
>>> /nics/c/home/chansen/lib/yt/src/yt-trunk-svn/scripts/iyt in <module>()
>>> ----> 1
>>>    2
>>>    3
>>>    4
>>>    5
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in dump(obj, file,
>>> protocol)
>>>  1360
>>>  1361 def dump(obj, file, protocol=None):
>>> -> 1362     Pickler(file, protocol).dump(obj)
>>>  1363
>>>  1364 def dumps(obj, protocol=None):
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in dump(self, obj)
>>>  222         if self.proto >= 2:
>>>  223             self.write(PROTO + chr(self.proto))
>>> --> 224         self.save(obj)
>>>  225         self.write(STOP)
>>>  226
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  329
>>>  330         # Save the reduce() output and finally memoize the object
>>>
>>> --> 331         self.save_reduce(obj=obj, *rv)
>>>  332
>>>  333     def persistent_id(self, obj):
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>>> func, args, state, listitems, dictitems, obj)
>>>  399         else:
>>>  400             save(func)
>>> --> 401             save(args)
>>>  402             write(REDUCE)
>>>  403
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  284         f = self.dispatch.get(t)
>>>  285         if f:
>>> --> 286             f(self, obj) # Call unbound method with explicit self
>>>  287             return
>>>  288
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self,
>>> obj)
>>>  560         write(MARK)
>>>  561         for element in obj:
>>> --> 562             save(element)
>>>  563
>>>  564         if id(obj) in memo:
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  284         f = self.dispatch.get(t)
>>>  285         if f:
>>> --> 286             f(self, obj) # Call unbound method with explicit self
>>>  287             return
>>>  288
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self,
>>> obj)
>>>  598
>>>  599         self.memoize(obj)
>>> --> 600         self._batch_appends(iter(obj))
>>>  601
>>>  602     dispatch[ListType] = save_list
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in
>>> _batch_appends(self,
>>> items)
>>>  613         if not self.bin:
>>>  614             for x in items:
>>> --> 615                 save(x)
>>>  616                 write(APPEND)
>>>  617             return
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  329
>>>  330         # Save the reduce() output and finally memoize the object
>>>
>>> --> 331         self.save_reduce(obj=obj, *rv)
>>>  332
>>>  333     def persistent_id(self, obj):
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>>> func, args, state, listitems, dictitems, obj)
>>>  399         else:
>>>  400             save(func)
>>> --> 401             save(args)
>>>  402             write(REDUCE)
>>>  403
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  284         f = self.dispatch.get(t)
>>>  285         if f:
>>> --> 286             f(self, obj) # Call unbound method with explicit self
>>>  287             return
>>>  288
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self,
>>> obj)
>>>  560         write(MARK)
>>>  561         for element in obj:
>>> --> 562             save(element)
>>>  563
>>>  564         if id(obj) in memo:
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  284         f = self.dispatch.get(t)
>>>  285         if f:
>>> --> 286             f(self, obj) # Call unbound method with explicit self
>>>  287             return
>>>  288
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self,
>>> obj)
>>>  598
>>>  599         self.memoize(obj)
>>> --> 600         self._batch_appends(iter(obj))
>>>  601
>>>  602     dispatch[ListType] = save_list
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in
>>> _batch_appends(self,
>>> items)
>>>  613         if not self.bin:
>>>  614             for x in items:
>>> --> 615                 save(x)
>>>  616                 write(APPEND)
>>>  617             return
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  329
>>>  330         # Save the reduce() output and finally memoize the object
>>>
>>> --> 331         self.save_reduce(obj=obj, *rv)
>>>  332
>>>  333     def persistent_id(self, obj):
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>>> func, args, state, listitems, dictitems, obj)
>>>  399         else:
>>>  400             save(func)
>>> --> 401             save(args)
>>>  402             write(REDUCE)
>>>  403
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  284         f = self.dispatch.get(t)
>>>  285         if f:
>>> --> 286             f(self, obj) # Call unbound method with explicit self
>>>  287             return
>>>  288
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self,
>>> obj)
>>>  560         write(MARK)
>>>  561         for element in obj:
>>> --> 562             save(element)
>>>  563
>>>  564         if id(obj) in memo:
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  284         f = self.dispatch.get(t)
>>>  285         if f:
>>> --> 286             f(self, obj) # Call unbound method with explicit self
>>>  287             return
>>>  288
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self,
>>> obj)
>>>  598
>>>  599         self.memoize(obj)
>>> --> 600         self._batch_appends(iter(obj))
>>>  601
>>>  602     dispatch[ListType] = save_list
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in
>>> _batch_appends(self,
>>> items)
>>>  613         if not self.bin:
>>>  614             for x in items:
>>> --> 615                 save(x)
>>>  616                 write(APPEND)
>>>  617             return
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>>  329
>>>  330         # Save the reduce() output and finally memoize the object
>>>
>>> --> 331         self.save_reduce(obj=obj, *rv)
>>>  332
>>>  333     def persistent_id(self, obj):
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>>> func, args, state, listitems, dictitems, obj)
>>>  403
>>>  404         if obj is not None:
>>> --> 405             self.memoize(obj)
>>>  406
>>>  407         # More new special cases (that work with older protocols as
>>>
>>>
>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in memoize(self,
>>> obj)
>>>  242         if self.fast:
>>>  243             return
>>> --> 244         assert id(obj) not in self.memo
>>>  245         memo_len = len(self.memo)
>>>  246         self.write(self.put(memo_len))
>>>
>>> AssertionError:
>>>
>>>
>>>
>>> Matthew Turk wrote:
>>>
>>>>
>>>> Hi Britton, Charles,
>>>>
>>>> Britton is correct, YT has its own routine for pickling.  This is a
>>>> couple step process, because specifically the pickling of parameter
>>>> files is somewhat ill-defined as a problem.  When you initialize a
>>>> parameter file, it's given a (likely) unique hash that corresponds to
>>>> its location, some of its contents, last modified time (or in Enzo's
>>>> case, the CurrentTimeIdentifier in the parameter file) and this is
>>>> stored in ~/.yt/parameter_files.csv.  By this mechanism, one can store
>>>> the pointer to a parameter file that exists on disk with a
>>>> bidirectional hash.  When a new parameter file is opened, it checks to
>>>> see if the path location in the .csv file needs to be updated, and if
>>>> so, it does.
>>>>
>>>> So when pickling a YT object, it stores the minimum set of information
>>>> necessary to recreate that object.  For a sphere, region, etc etc,
>>>> this is really just the necessary arguments to recreate the empty
>>>> object, along with the hash of the parameter file that it belongs to.
>>>> So the actual information necessary to pickle simple objects is very
>>>> small: hash, arguments.  (Called "_con_args" in the source, for
>>>> Constructor Arguments.)  When this is unpickled, the parameter file is
>>>> recreated (or grabbed from in memory, if it has already been
>>>> instantiated) and then the object is recreated.
>>>>
>>>> For more complicated objects, like Clumps and ExtractedSets, we have
>>>> to store more information -- because the constructor arguments are
>>>> much larger and more complicated.  But the basic idea is the same.
>>>>
>>>> When using save_object, the same pickling routine gets called, but
>>>> instead of being loosely affiliated and put into a separate file, it
>>>> gets stored as a dataset in a the corresponding .yt file, which is
>>>> itself an HDF5 file.  So the results should be the same, except
>>>> save_object and load_object remove the extra file from the equation.
>>>>
>>>> The error you're seeing above seems to be related to an incompletely
>>>> saved or corrupted file; can you replicate this in a very simple,
>>>> quick fashion?  I would suggest getting a clump that is very, very
>>>> small, taken from a small data region, and seeing if that will pickle
>>>> and unpickle correctly.  If it does, then we know that perhaps the
>>>> previous pickle file was corrupted, or that something is wrong with
>>>> the yt pickling protocol.  If it doesn't work, then we *know*
>>>> something is wrong with the yt pickling protocol.
>>>>
>>>> Best,
>>>>
>>>> Matt
>>>>
>>>> On Thu, May 27, 2010 at 6:43 PM, Britton Smith <brittonsmith at gmail.com>
>>>> wrote:
>>>>
>>>>
>>>>>
>>>>> Hi Chris,
>>>>>
>>>>> YT has its own routine designed to pickle data objects for saving and
>>>>> reloading.  For a given pf, you can do pf.h.save_object(object, name)
>>>>> and
>>>>> the object will be saved to the .yt file associated with the dataset.
>>>>>  You
>>>>> can then get it back with object = pf.h.load_object(name).  I have used
>>>>> this
>>>>> successfully to save the mast_clump structure from clump finding.
>>>>>
>>>>> Britton
>>>>>
>>>>> On Thu, May 27, 2010 at 9:26 PM, Charles Hansen
>>>>> <chansen at astro.berkeley.edu>
>>>>> wrote:
>>>>>
>>>>>
>>>>>>
>>>>>> The find_clumps function is working correctly for me I believe, but
>>>>>> the
>>>>>> analysis takes a very long time and I'd like to save the master_clump
>>>>>> object
>>>>>> (from
>>>>>>
>>>>>> http://yt.enzotools.org/doc/cookbook/recipes.html#cookbook-find-clumps)
>>>>>> for
>>>>>> further analysis.  I've tried pickling master_clump.  It dumps
>>>>>> correctly,
>>>>>> but gives an EOF error (below) on loading.  Is there another way to
>>>>>> save
>>>>>> master_clump?
>>>>>>
>>>>>> Thanks,
>>>>>> Charles
>>>>>>
>>>>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load(file)
>>>>>>  1368
>>>>>>  1369 def load(file):
>>>>>> -> 1370     return Unpickler(file).load()
>>>>>>  1371
>>>>>>  1372 def loads(str):
>>>>>>
>>>>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load(self)
>>>>>>  856             while 1:
>>>>>>  857                 key = read(1)
>>>>>> --> 858                 dispatch[key](self)
>>>>>>  859         except _Stop, stopinst:
>>>>>>  860             return stopinst.value
>>>>>>
>>>>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load_eof(self)
>>>>>>  878
>>>>>>  879     def load_eof(self):
>>>>>> --> 880         raise EOFError
>>>>>>  881     dispatch[''] = load_eof
>>>>>>  882
>>>>>>
>>>>>> EOFError:
>>>>>>
>>>>>> _______________________________________________
>>>>>> yt-users mailing list
>>>>>> yt-users at lists.spacepope.org
>>>>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>>>>
>>>>>>
>>>>>
>>>>> _______________________________________________
>>>>> yt-users mailing list
>>>>> yt-users at lists.spacepope.org
>>>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>>>
>>>>>
>>>>>
>>>>>
>>>>
>>>> _______________________________________________
>>>> yt-users mailing list
>>>> yt-users at lists.spacepope.org
>>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>>
>>>>
>>>
>>> _______________________________________________
>>> yt-users mailing list
>>> yt-users at lists.spacepope.org
>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>
>>>
>>
>> _______________________________________________
>> yt-users mailing list
>> yt-users at lists.spacepope.org
>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>
>
> _______________________________________________
> yt-users mailing list
> yt-users at lists.spacepope.org
> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>



More information about the yt-users mailing list