[yt-users] pickling clumps

Charles Hansen chansen at astro.berkeley.edu
Fri May 28 11:46:46 PDT 2010


sorry, the preceding line was

outfile = open("dummy.pkl", "wb")

The type is...
 
In [73]: type(outfile)
Out[73]: <type 'file'>


Matthew Turk wrote:
> Hi Charles,
>
> What's "outfile" in this case?  Can you send a bit more of your setup
> of the problem, or at least the output of "type(outfile)"?
>
> -Matt
>
> On Fri, May 28, 2010 at 11:32 AM, Charles Hansen
> <chansen at astro.berkeley.edu> wrote:
>   
>> pf.h.save_object does work as advertised for me.  Thanks for the pointer.
>>  For what it is worth, clumps do not have the attribute save_object, so
>> masterclump.save_object is not a valid function.
>>
>> I tried pickling again and I can no longer even dump the clumps.  I don't
>> know why the dumping 'worked' before and not now, though I have been trying
>> it on more complicated clumps than before.  I pasted the error message if
>> you are interested.
>>
>> In [6]: pickle.dump(masterclump, outfile)
>> ---------------------------------------------------------------------------
>> AssertionError                            Traceback (most recent call last)
>>
>> /nics/c/home/chansen/lib/yt/src/yt-trunk-svn/scripts/iyt in <module>()
>> ----> 1
>>     2
>>     3
>>     4
>>     5
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in dump(obj, file,
>> protocol)
>>  1360
>>  1361 def dump(obj, file, protocol=None):
>> -> 1362     Pickler(file, protocol).dump(obj)
>>  1363
>>  1364 def dumps(obj, protocol=None):
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in dump(self, obj)
>>   222         if self.proto >= 2:
>>   223             self.write(PROTO + chr(self.proto))
>> --> 224         self.save(obj)
>>   225         self.write(STOP)
>>   226
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   329
>>   330         # Save the reduce() output and finally memoize the object
>>
>> --> 331         self.save_reduce(obj=obj, *rv)
>>   332
>>   333     def persistent_id(self, obj):
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>> func, args, state, listitems, dictitems, obj)
>>   399         else:
>>   400             save(func)
>> --> 401             save(args)
>>   402             write(REDUCE)
>>   403
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   284         f = self.dispatch.get(t)
>>   285         if f:
>> --> 286             f(self, obj) # Call unbound method with explicit self
>>   287             return
>>   288
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self,
>> obj)
>>   560         write(MARK)
>>   561         for element in obj:
>> --> 562             save(element)
>>   563
>>   564         if id(obj) in memo:
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   284         f = self.dispatch.get(t)
>>   285         if f:
>> --> 286             f(self, obj) # Call unbound method with explicit self
>>   287             return
>>   288
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self, obj)
>>   598
>>   599         self.memoize(obj)
>> --> 600         self._batch_appends(iter(obj))
>>   601
>>   602     dispatch[ListType] = save_list
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in _batch_appends(self,
>> items)
>>   613         if not self.bin:
>>   614             for x in items:
>> --> 615                 save(x)
>>   616                 write(APPEND)
>>   617             return
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   329
>>   330         # Save the reduce() output and finally memoize the object
>>
>> --> 331         self.save_reduce(obj=obj, *rv)
>>   332
>>   333     def persistent_id(self, obj):
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>> func, args, state, listitems, dictitems, obj)
>>   399         else:
>>   400             save(func)
>> --> 401             save(args)
>>   402             write(REDUCE)
>>   403
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   284         f = self.dispatch.get(t)
>>   285         if f:
>> --> 286             f(self, obj) # Call unbound method with explicit self
>>   287             return
>>   288
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self,
>> obj)
>>   560         write(MARK)
>>   561         for element in obj:
>> --> 562             save(element)
>>   563
>>   564         if id(obj) in memo:
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   284         f = self.dispatch.get(t)
>>   285         if f:
>> --> 286             f(self, obj) # Call unbound method with explicit self
>>   287             return
>>   288
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self, obj)
>>   598
>>   599         self.memoize(obj)
>> --> 600         self._batch_appends(iter(obj))
>>   601
>>   602     dispatch[ListType] = save_list
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in _batch_appends(self,
>> items)
>>   613         if not self.bin:
>>   614             for x in items:
>> --> 615                 save(x)
>>   616                 write(APPEND)
>>   617             return
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   329
>>   330         # Save the reduce() output and finally memoize the object
>>
>> --> 331         self.save_reduce(obj=obj, *rv)
>>   332
>>   333     def persistent_id(self, obj):
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>> func, args, state, listitems, dictitems, obj)
>>   399         else:
>>   400             save(func)
>> --> 401             save(args)
>>   402             write(REDUCE)
>>   403
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   284         f = self.dispatch.get(t)
>>   285         if f:
>> --> 286             f(self, obj) # Call unbound method with explicit self
>>   287             return
>>   288
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_tuple(self,
>> obj)
>>   560         write(MARK)
>>   561         for element in obj:
>> --> 562             save(element)
>>   563
>>   564         if id(obj) in memo:
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   284         f = self.dispatch.get(t)
>>   285         if f:
>> --> 286             f(self, obj) # Call unbound method with explicit self
>>   287             return
>>   288
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_list(self, obj)
>>   598
>>   599         self.memoize(obj)
>> --> 600         self._batch_appends(iter(obj))
>>   601
>>   602     dispatch[ListType] = save_list
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in _batch_appends(self,
>> items)
>>   613         if not self.bin:
>>   614             for x in items:
>> --> 615                 save(x)
>>   616                 write(APPEND)
>>   617             return
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save(self, obj)
>>   329
>>   330         # Save the reduce() output and finally memoize the object
>>
>> --> 331         self.save_reduce(obj=obj, *rv)
>>   332
>>   333     def persistent_id(self, obj):
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in save_reduce(self,
>> func, args, state, listitems, dictitems, obj)
>>   403
>>   404         if obj is not None:
>> --> 405             self.memoize(obj)
>>   406
>>   407         # More new special cases (that work with older protocols as
>>
>>
>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in memoize(self, obj)
>>   242         if self.fast:
>>   243             return
>> --> 244         assert id(obj) not in self.memo
>>   245         memo_len = len(self.memo)
>>   246         self.write(self.put(memo_len))
>>
>> AssertionError:
>>
>>
>>
>> Matthew Turk wrote:
>>     
>>> Hi Britton, Charles,
>>>
>>> Britton is correct, YT has its own routine for pickling.  This is a
>>> couple step process, because specifically the pickling of parameter
>>> files is somewhat ill-defined as a problem.  When you initialize a
>>> parameter file, it's given a (likely) unique hash that corresponds to
>>> its location, some of its contents, last modified time (or in Enzo's
>>> case, the CurrentTimeIdentifier in the parameter file) and this is
>>> stored in ~/.yt/parameter_files.csv.  By this mechanism, one can store
>>> the pointer to a parameter file that exists on disk with a
>>> bidirectional hash.  When a new parameter file is opened, it checks to
>>> see if the path location in the .csv file needs to be updated, and if
>>> so, it does.
>>>
>>> So when pickling a YT object, it stores the minimum set of information
>>> necessary to recreate that object.  For a sphere, region, etc etc,
>>> this is really just the necessary arguments to recreate the empty
>>> object, along with the hash of the parameter file that it belongs to.
>>> So the actual information necessary to pickle simple objects is very
>>> small: hash, arguments.  (Called "_con_args" in the source, for
>>> Constructor Arguments.)  When this is unpickled, the parameter file is
>>> recreated (or grabbed from in memory, if it has already been
>>> instantiated) and then the object is recreated.
>>>
>>> For more complicated objects, like Clumps and ExtractedSets, we have
>>> to store more information -- because the constructor arguments are
>>> much larger and more complicated.  But the basic idea is the same.
>>>
>>> When using save_object, the same pickling routine gets called, but
>>> instead of being loosely affiliated and put into a separate file, it
>>> gets stored as a dataset in a the corresponding .yt file, which is
>>> itself an HDF5 file.  So the results should be the same, except
>>> save_object and load_object remove the extra file from the equation.
>>>
>>> The error you're seeing above seems to be related to an incompletely
>>> saved or corrupted file; can you replicate this in a very simple,
>>> quick fashion?  I would suggest getting a clump that is very, very
>>> small, taken from a small data region, and seeing if that will pickle
>>> and unpickle correctly.  If it does, then we know that perhaps the
>>> previous pickle file was corrupted, or that something is wrong with
>>> the yt pickling protocol.  If it doesn't work, then we *know*
>>> something is wrong with the yt pickling protocol.
>>>
>>> Best,
>>>
>>> Matt
>>>
>>> On Thu, May 27, 2010 at 6:43 PM, Britton Smith <brittonsmith at gmail.com>
>>> wrote:
>>>
>>>       
>>>> Hi Chris,
>>>>
>>>> YT has its own routine designed to pickle data objects for saving and
>>>> reloading.  For a given pf, you can do pf.h.save_object(object, name) and
>>>> the object will be saved to the .yt file associated with the dataset.
>>>>  You
>>>> can then get it back with object = pf.h.load_object(name).  I have used
>>>> this
>>>> successfully to save the mast_clump structure from clump finding.
>>>>
>>>> Britton
>>>>
>>>> On Thu, May 27, 2010 at 9:26 PM, Charles Hansen
>>>> <chansen at astro.berkeley.edu>
>>>> wrote:
>>>>
>>>>         
>>>>> The find_clumps function is working correctly for me I believe, but the
>>>>> analysis takes a very long time and I'd like to save the master_clump
>>>>> object
>>>>> (from
>>>>> http://yt.enzotools.org/doc/cookbook/recipes.html#cookbook-find-clumps)
>>>>> for
>>>>> further analysis.  I've tried pickling master_clump.  It dumps
>>>>> correctly,
>>>>> but gives an EOF error (below) on loading.  Is there another way to save
>>>>> master_clump?
>>>>>
>>>>> Thanks,
>>>>> Charles
>>>>>
>>>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load(file)
>>>>>  1368
>>>>>  1369 def load(file):
>>>>> -> 1370     return Unpickler(file).load()
>>>>>  1371
>>>>>  1372 def loads(str):
>>>>>
>>>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load(self)
>>>>>   856             while 1:
>>>>>   857                 key = read(1)
>>>>> --> 858                 dispatch[key](self)
>>>>>   859         except _Stop, stopinst:
>>>>>   860             return stopinst.value
>>>>>
>>>>> /nics/c/home/chansen/lib/yt/lib/python2.6/pickle.pyc in load_eof(self)
>>>>>   878
>>>>>   879     def load_eof(self):
>>>>> --> 880         raise EOFError
>>>>>   881     dispatch[''] = load_eof
>>>>>   882
>>>>>
>>>>> EOFError:
>>>>>
>>>>> _______________________________________________
>>>>> yt-users mailing list
>>>>> yt-users at lists.spacepope.org
>>>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>>>
>>>>>           
>>>> _______________________________________________
>>>> yt-users mailing list
>>>> yt-users at lists.spacepope.org
>>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>>
>>>>
>>>>
>>>>         
>>> _______________________________________________
>>> yt-users mailing list
>>> yt-users at lists.spacepope.org
>>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>>
>>>       
>> _______________________________________________
>> yt-users mailing list
>> yt-users at lists.spacepope.org
>> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>>
>>     
> _______________________________________________
> yt-users mailing list
> yt-users at lists.spacepope.org
> http://lists.spacepope.org/listinfo.cgi/yt-users-spacepope.org
>   




More information about the yt-users mailing list