Thread: [Assorted-commits] SF.net SVN: assorted: [227] python-commons/trunk/src/commons/seqs.py
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-01-13 02:30:15
|
Revision: 227 http://assorted.svn.sourceforge.net/assorted/?rev=227&view=rev Author: yangzhang Date: 2008-01-12 18:29:58 -0800 (Sat, 12 Jan 2008) Log Message: ----------- added safe_pickle, write_pickle; made read_pickle (formerly the AFAIK unused read_objs) more generic Modified Paths: -------------- python-commons/trunk/src/commons/seqs.py Modified: python-commons/trunk/src/commons/seqs.py =================================================================== --- python-commons/trunk/src/commons/seqs.py 2008-01-11 04:40:18 UTC (rev 226) +++ python-commons/trunk/src/commons/seqs.py 2008-01-13 02:29:58 UTC (rev 227) @@ -1,8 +1,12 @@ # -*- mode: python; tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4; -*- # vim:ft=python:et:sw=4:ts=4 +from __future__ import with_statement + from cStringIO import StringIO -import cPickle as pickle +from cPickle import * +from struct import pack, unpack +from contextlib import closing from itertools import ( chain, count, ifilterfalse, islice, izip, tee ) @@ -12,38 +16,52 @@ @var default_chunk_size: The default chunk size used by L{chunkify}. """ -default_chunk_size = 4096 +default_chunk_size = 8192 -def read_objs( s ): +def read_pickle( read ): """ - Given an input socket, reads in pickled objects from it. This is a - generator which yields those objects as they come. I assume that - the pickling format is resistant against partial (incomplete) - pickles. + Given a reader function L{read}, reads in pickled objects from it. I am a + generator which yields unpickled objects. I assume that the pickling + is "safe," done using L{safe_pickle}. - @param stream: The input stream. - @type stream: stream + @param read: The reader function that reads from a stream. It should take + a single argument, the number of bytes to consume. + @type read: function + """ + with closing( StringIO() ) as stream: + obj = None # return this if we hit eof (not enough bytes read) - @param chunk_size: The size of the chunk (usually the number of - bytes to read). - @type chunk_size: int + def read_until( target ): + remain = target - streamlen( stream ) + if remain > 0: + chunk = read( remain ) + # append to end + stream.seek(0,2) + stream.write( chunk ) + return stream.tell() >= target + + if read_until(4): + stream.seek(0) + (length,) = unpack('i4', stream.read(4)) + if read_until(length+4): + stream.seek(4) + obj = load(stream) + + return ( obj, stream.read() ) + +def safe_pickle( obj ): """ - stream = StringIO() - while True: - chunk = s.recv( 8192 ) - if len( chunk ) == 0: break - stream.write( chunk ) - stream.seek( 0 ) - while True: - try: obj = pickle.load( stream ) - except ( EOFError, pickle.UnpicklingError, ValueError ): break - else: - yield obj - rem = stream.read() - stream.seek( 0 ) - stream.write( rem ) - stream.truncate() + Pickle L{obj} but prepends the serialized length in bytes. + """ + msg = dumps(obj) + return pack('i4',len(msg)) + msg +def write_pickle( obj, write ): + """ + Write L{obj} using function L{write}, in a safe, pickle-able fashion. + """ + return write( safe_pickle( obj ) ) + def streamlen( stream ): """ Get the length of a stream (e.g. file stream or StringIO). This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-01-18 06:55:04
|
Revision: 236 http://assorted.svn.sourceforge.net/assorted/?rev=236&view=rev Author: yangzhang Date: 2008-01-17 22:55:09 -0800 (Thu, 17 Jan 2008) Log Message: ----------- added read_pickles, touched up doc Modified Paths: -------------- python-commons/trunk/src/commons/seqs.py Modified: python-commons/trunk/src/commons/seqs.py =================================================================== --- python-commons/trunk/src/commons/seqs.py 2008-01-17 23:57:23 UTC (rev 235) +++ python-commons/trunk/src/commons/seqs.py 2008-01-18 06:55:09 UTC (rev 236) @@ -27,6 +27,11 @@ @param read: The reader function that reads from a stream. It should take a single argument, the number of bytes to consume. @type read: function + + @return: A tuple whose first element is the deserialized object or None if + EOF was encountered, and whose second element is the remainder bytes until + the EOF that were not consumed by unpickling. + @rtype: (object, str) """ with closing( StringIO() ) as stream: obj = None # return this if we hit eof (not enough bytes read) @@ -49,6 +54,16 @@ return ( obj, stream.read() ) +def read_pickles( read ): + """ + Reads all the consecutively pickled objects from the L{read} function. + """ + while True: + pair = ( obj, rem ) = read_pickle( read ) + if obj is None: + break + yield pair + def safe_pickle( obj ): """ Pickle L{obj} but prepends the serialized length in bytes. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-01-18 09:01:45
|
Revision: 239 http://assorted.svn.sourceforge.net/assorted/?rev=239&view=rev Author: yangzhang Date: 2008-01-18 01:01:43 -0800 (Fri, 18 Jan 2008) Log Message: ----------- updated pickling tools to allow/default to more efficient protocols Modified Paths: -------------- python-commons/trunk/src/commons/seqs.py Modified: python-commons/trunk/src/commons/seqs.py =================================================================== --- python-commons/trunk/src/commons/seqs.py 2008-01-18 06:56:08 UTC (rev 238) +++ python-commons/trunk/src/commons/seqs.py 2008-01-18 09:01:43 UTC (rev 239) @@ -18,7 +18,7 @@ default_chunk_size = 8192 -def read_pickle( read ): +def read_pickle( read, init = '', length_thresh = 100000 ): """ Given a reader function L{read}, reads in pickled objects from it. I am a generator which yields unpickled objects. I assume that the pickling @@ -33,26 +33,36 @@ the EOF that were not consumed by unpickling. @rtype: (object, str) """ - with closing( StringIO() ) as stream: + with closing( StringIO() ) as sio: obj = None # return this if we hit eof (not enough bytes read) + sio.write( init ) def read_until( target ): - remain = target - streamlen( stream ) + remain = target - streamlen( sio ) if remain > 0: chunk = read( remain ) # append to end - stream.seek(0,2) - stream.write( chunk ) - return stream.tell() >= target + sio.seek(0,2) + sio.write( chunk ) + offset = streamlen( sio ) + sio.seek(0) + return offset >= target if read_until(4): - stream.seek(0) - (length,) = unpack('i4', stream.read(4)) + lengthstr = sio.read(4) + (length,) = unpack('i4', lengthstr) + if length_thresh is not None and length > length_thresh or \ + length <= 0: + warning( 'read_pickle', + 'got length', length, + 'streamlen', streamlen(sio), + 'first bytes %x %x %x %x' % tuple(map(ord,lengthstr)) ) if read_until(length+4): - stream.seek(4) - obj = load(stream) + # start reading from right after header + sio.seek(4) + obj = load(sio) - return ( obj, stream.read() ) + return ( obj, sio.read() ) def read_pickles( read ): """ @@ -60,16 +70,23 @@ """ while True: pair = ( obj, rem ) = read_pickle( read ) - if obj is None: - break + if obj is None: break yield pair -def safe_pickle( obj ): - """ - Pickle L{obj} but prepends the serialized length in bytes. - """ - msg = dumps(obj) - return pack('i4',len(msg)) + msg +class safe_pickler( object ): + def __init__( self, protocol = HIGHEST_PROTOCOL ): + self.sio = StringIO() + self.pickler = Pickler( self.sio, protocol ) + def dumps( self, obj ): + """ + Pickle L{obj} but prepends the serialized length in bytes. + """ + self.pickler.clear_memo() + self.sio.seek(0) + self.pickler.dump(obj) + self.sio.truncate() + msg = self.sio.getvalue() + return pack('i4', self.sio.tell()) + msg def write_pickle( obj, write ): """ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-01-20 06:25:48
|
Revision: 249 http://assorted.svn.sourceforge.net/assorted/?rev=249&view=rev Author: yangzhang Date: 2008-01-19 22:25:53 -0800 (Sat, 19 Jan 2008) Log Message: ----------- fixed missing import Modified Paths: -------------- python-commons/trunk/src/commons/seqs.py Modified: python-commons/trunk/src/commons/seqs.py =================================================================== --- python-commons/trunk/src/commons/seqs.py 2008-01-20 06:25:30 UTC (rev 248) +++ python-commons/trunk/src/commons/seqs.py 2008-01-20 06:25:53 UTC (rev 249) @@ -1,7 +1,7 @@ # -*- mode: python; tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4; -*- # vim:ft=python:et:sw=4:ts=4 -from __future__ import with_statement +from __future__ import ( absolute_import, with_statement ) from cStringIO import StringIO from cPickle import * @@ -9,6 +9,7 @@ from contextlib import closing from itertools import ( chain, count, ifilterfalse, islice, izip, tee ) +from .log import warning """ Sequences, streams, and generators. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-04-29 00:31:42
|
Revision: 686 http://assorted.svn.sourceforge.net/assorted/?rev=686&view=rev Author: yangzhang Date: 2008-04-28 17:31:41 -0700 (Mon, 28 Apr 2008) Log Message: ----------- addec countstep Modified Paths: -------------- python-commons/trunk/src/commons/seqs.py Modified: python-commons/trunk/src/commons/seqs.py =================================================================== --- python-commons/trunk/src/commons/seqs.py 2008-04-25 21:38:05 UTC (rev 685) +++ python-commons/trunk/src/commons/seqs.py 2008-04-29 00:31:41 UTC (rev 686) @@ -341,6 +341,15 @@ del chunk[ ( i + 1 ) % n : ] yield chunk +def countstep(start, step): + """ + Generate [start, start+step, start+2*step, start+3*step, ...]. + """ + i = start + while True: + yield i + i += step + def take(n, seq): return list(islice(seq, n)) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-05-08 03:19:13
|
Revision: 707 http://assorted.svn.sourceforge.net/assorted/?rev=707&view=rev Author: yangzhang Date: 2008-05-07 20:19:12 -0700 (Wed, 07 May 2008) Log Message: ----------- fixed missing import Modified Paths: -------------- python-commons/trunk/src/commons/seqs.py Modified: python-commons/trunk/src/commons/seqs.py =================================================================== --- python-commons/trunk/src/commons/seqs.py 2008-05-08 03:18:57 UTC (rev 706) +++ python-commons/trunk/src/commons/seqs.py 2008-05-08 03:19:12 UTC (rev 707) @@ -8,7 +8,7 @@ from struct import pack, unpack from contextlib import closing from itertools import ( chain, count, ifilterfalse, islice, - izip, tee ) + izip, repeat, tee ) from .log import warning """ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |