Thread: [Assorted-commits] SF.net SVN: assorted: [227] python-commons/trunk/src/commons/seqs.py
Brought to you by:
yangzhang
|
From: <yan...@us...> - 2008-01-13 02:30:15
|
Revision: 227
http://assorted.svn.sourceforge.net/assorted/?rev=227&view=rev
Author: yangzhang
Date: 2008-01-12 18:29:58 -0800 (Sat, 12 Jan 2008)
Log Message:
-----------
added safe_pickle, write_pickle; made read_pickle (formerly the AFAIK unused read_objs) more generic
Modified Paths:
--------------
python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py 2008-01-11 04:40:18 UTC (rev 226)
+++ python-commons/trunk/src/commons/seqs.py 2008-01-13 02:29:58 UTC (rev 227)
@@ -1,8 +1,12 @@
# -*- mode: python; tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4; -*-
# vim:ft=python:et:sw=4:ts=4
+from __future__ import with_statement
+
from cStringIO import StringIO
-import cPickle as pickle
+from cPickle import *
+from struct import pack, unpack
+from contextlib import closing
from itertools import ( chain, count, ifilterfalse, islice,
izip, tee )
@@ -12,38 +16,52 @@
@var default_chunk_size: The default chunk size used by L{chunkify}.
"""
-default_chunk_size = 4096
+default_chunk_size = 8192
-def read_objs( s ):
+def read_pickle( read ):
"""
- Given an input socket, reads in pickled objects from it. This is a
- generator which yields those objects as they come. I assume that
- the pickling format is resistant against partial (incomplete)
- pickles.
+ Given a reader function L{read}, reads in pickled objects from it. I am a
+ generator which yields unpickled objects. I assume that the pickling
+ is "safe," done using L{safe_pickle}.
- @param stream: The input stream.
- @type stream: stream
+ @param read: The reader function that reads from a stream. It should take
+ a single argument, the number of bytes to consume.
+ @type read: function
+ """
+ with closing( StringIO() ) as stream:
+ obj = None # return this if we hit eof (not enough bytes read)
- @param chunk_size: The size of the chunk (usually the number of
- bytes to read).
- @type chunk_size: int
+ def read_until( target ):
+ remain = target - streamlen( stream )
+ if remain > 0:
+ chunk = read( remain )
+ # append to end
+ stream.seek(0,2)
+ stream.write( chunk )
+ return stream.tell() >= target
+
+ if read_until(4):
+ stream.seek(0)
+ (length,) = unpack('i4', stream.read(4))
+ if read_until(length+4):
+ stream.seek(4)
+ obj = load(stream)
+
+ return ( obj, stream.read() )
+
+def safe_pickle( obj ):
"""
- stream = StringIO()
- while True:
- chunk = s.recv( 8192 )
- if len( chunk ) == 0: break
- stream.write( chunk )
- stream.seek( 0 )
- while True:
- try: obj = pickle.load( stream )
- except ( EOFError, pickle.UnpicklingError, ValueError ): break
- else:
- yield obj
- rem = stream.read()
- stream.seek( 0 )
- stream.write( rem )
- stream.truncate()
+ Pickle L{obj} but prepends the serialized length in bytes.
+ """
+ msg = dumps(obj)
+ return pack('i4',len(msg)) + msg
+def write_pickle( obj, write ):
+ """
+ Write L{obj} using function L{write}, in a safe, pickle-able fashion.
+ """
+ return write( safe_pickle( obj ) )
+
def streamlen( stream ):
"""
Get the length of a stream (e.g. file stream or StringIO).
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <yan...@us...> - 2008-01-18 06:55:04
|
Revision: 236
http://assorted.svn.sourceforge.net/assorted/?rev=236&view=rev
Author: yangzhang
Date: 2008-01-17 22:55:09 -0800 (Thu, 17 Jan 2008)
Log Message:
-----------
added read_pickles, touched up doc
Modified Paths:
--------------
python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py 2008-01-17 23:57:23 UTC (rev 235)
+++ python-commons/trunk/src/commons/seqs.py 2008-01-18 06:55:09 UTC (rev 236)
@@ -27,6 +27,11 @@
@param read: The reader function that reads from a stream. It should take
a single argument, the number of bytes to consume.
@type read: function
+
+ @return: A tuple whose first element is the deserialized object or None if
+ EOF was encountered, and whose second element is the remainder bytes until
+ the EOF that were not consumed by unpickling.
+ @rtype: (object, str)
"""
with closing( StringIO() ) as stream:
obj = None # return this if we hit eof (not enough bytes read)
@@ -49,6 +54,16 @@
return ( obj, stream.read() )
+def read_pickles( read ):
+ """
+ Reads all the consecutively pickled objects from the L{read} function.
+ """
+ while True:
+ pair = ( obj, rem ) = read_pickle( read )
+ if obj is None:
+ break
+ yield pair
+
def safe_pickle( obj ):
"""
Pickle L{obj} but prepends the serialized length in bytes.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <yan...@us...> - 2008-01-18 09:01:45
|
Revision: 239
http://assorted.svn.sourceforge.net/assorted/?rev=239&view=rev
Author: yangzhang
Date: 2008-01-18 01:01:43 -0800 (Fri, 18 Jan 2008)
Log Message:
-----------
updated pickling tools to allow/default to more efficient protocols
Modified Paths:
--------------
python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py 2008-01-18 06:56:08 UTC (rev 238)
+++ python-commons/trunk/src/commons/seqs.py 2008-01-18 09:01:43 UTC (rev 239)
@@ -18,7 +18,7 @@
default_chunk_size = 8192
-def read_pickle( read ):
+def read_pickle( read, init = '', length_thresh = 100000 ):
"""
Given a reader function L{read}, reads in pickled objects from it. I am a
generator which yields unpickled objects. I assume that the pickling
@@ -33,26 +33,36 @@
the EOF that were not consumed by unpickling.
@rtype: (object, str)
"""
- with closing( StringIO() ) as stream:
+ with closing( StringIO() ) as sio:
obj = None # return this if we hit eof (not enough bytes read)
+ sio.write( init )
def read_until( target ):
- remain = target - streamlen( stream )
+ remain = target - streamlen( sio )
if remain > 0:
chunk = read( remain )
# append to end
- stream.seek(0,2)
- stream.write( chunk )
- return stream.tell() >= target
+ sio.seek(0,2)
+ sio.write( chunk )
+ offset = streamlen( sio )
+ sio.seek(0)
+ return offset >= target
if read_until(4):
- stream.seek(0)
- (length,) = unpack('i4', stream.read(4))
+ lengthstr = sio.read(4)
+ (length,) = unpack('i4', lengthstr)
+ if length_thresh is not None and length > length_thresh or \
+ length <= 0:
+ warning( 'read_pickle',
+ 'got length', length,
+ 'streamlen', streamlen(sio),
+ 'first bytes %x %x %x %x' % tuple(map(ord,lengthstr)) )
if read_until(length+4):
- stream.seek(4)
- obj = load(stream)
+ # start reading from right after header
+ sio.seek(4)
+ obj = load(sio)
- return ( obj, stream.read() )
+ return ( obj, sio.read() )
def read_pickles( read ):
"""
@@ -60,16 +70,23 @@
"""
while True:
pair = ( obj, rem ) = read_pickle( read )
- if obj is None:
- break
+ if obj is None: break
yield pair
-def safe_pickle( obj ):
- """
- Pickle L{obj} but prepends the serialized length in bytes.
- """
- msg = dumps(obj)
- return pack('i4',len(msg)) + msg
+class safe_pickler( object ):
+ def __init__( self, protocol = HIGHEST_PROTOCOL ):
+ self.sio = StringIO()
+ self.pickler = Pickler( self.sio, protocol )
+ def dumps( self, obj ):
+ """
+ Pickle L{obj} but prepends the serialized length in bytes.
+ """
+ self.pickler.clear_memo()
+ self.sio.seek(0)
+ self.pickler.dump(obj)
+ self.sio.truncate()
+ msg = self.sio.getvalue()
+ return pack('i4', self.sio.tell()) + msg
def write_pickle( obj, write ):
"""
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <yan...@us...> - 2008-01-20 06:25:48
|
Revision: 249
http://assorted.svn.sourceforge.net/assorted/?rev=249&view=rev
Author: yangzhang
Date: 2008-01-19 22:25:53 -0800 (Sat, 19 Jan 2008)
Log Message:
-----------
fixed missing import
Modified Paths:
--------------
python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py 2008-01-20 06:25:30 UTC (rev 248)
+++ python-commons/trunk/src/commons/seqs.py 2008-01-20 06:25:53 UTC (rev 249)
@@ -1,7 +1,7 @@
# -*- mode: python; tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4; -*-
# vim:ft=python:et:sw=4:ts=4
-from __future__ import with_statement
+from __future__ import ( absolute_import, with_statement )
from cStringIO import StringIO
from cPickle import *
@@ -9,6 +9,7 @@
from contextlib import closing
from itertools import ( chain, count, ifilterfalse, islice,
izip, tee )
+from .log import warning
"""
Sequences, streams, and generators.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <yan...@us...> - 2008-04-29 00:31:42
|
Revision: 686
http://assorted.svn.sourceforge.net/assorted/?rev=686&view=rev
Author: yangzhang
Date: 2008-04-28 17:31:41 -0700 (Mon, 28 Apr 2008)
Log Message:
-----------
addec countstep
Modified Paths:
--------------
python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py 2008-04-25 21:38:05 UTC (rev 685)
+++ python-commons/trunk/src/commons/seqs.py 2008-04-29 00:31:41 UTC (rev 686)
@@ -341,6 +341,15 @@
del chunk[ ( i + 1 ) % n : ]
yield chunk
+def countstep(start, step):
+ """
+ Generate [start, start+step, start+2*step, start+3*step, ...].
+ """
+ i = start
+ while True:
+ yield i
+ i += step
+
def take(n, seq):
return list(islice(seq, n))
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <yan...@us...> - 2008-05-08 03:19:13
|
Revision: 707
http://assorted.svn.sourceforge.net/assorted/?rev=707&view=rev
Author: yangzhang
Date: 2008-05-07 20:19:12 -0700 (Wed, 07 May 2008)
Log Message:
-----------
fixed missing import
Modified Paths:
--------------
python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py 2008-05-08 03:18:57 UTC (rev 706)
+++ python-commons/trunk/src/commons/seqs.py 2008-05-08 03:19:12 UTC (rev 707)
@@ -8,7 +8,7 @@
from struct import pack, unpack
from contextlib import closing
from itertools import ( chain, count, ifilterfalse, islice,
- izip, tee )
+ izip, repeat, tee )
from .log import warning
"""
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|