Thread: [Assorted-commits] SF.net SVN: assorted: [227] python-commons/trunk/src/commons/seqs.py
                
                Brought to you by:
                
                    yangzhang
                    
                
            
            
        
        
        
    | 
      
      
      From: <yan...@us...> - 2008-01-13 02:30:15
       | 
| Revision: 227
          http://assorted.svn.sourceforge.net/assorted/?rev=227&view=rev
Author:   yangzhang
Date:     2008-01-12 18:29:58 -0800 (Sat, 12 Jan 2008)
Log Message:
-----------
added safe_pickle, write_pickle; made read_pickle (formerly the AFAIK unused read_objs) more generic
Modified Paths:
--------------
    python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py	2008-01-11 04:40:18 UTC (rev 226)
+++ python-commons/trunk/src/commons/seqs.py	2008-01-13 02:29:58 UTC (rev 227)
@@ -1,8 +1,12 @@
 # -*- mode: python; tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4; -*-
 # vim:ft=python:et:sw=4:ts=4
 
+from __future__ import with_statement
+
 from cStringIO import StringIO
-import cPickle as pickle
+from cPickle import *
+from struct import pack, unpack
+from contextlib import closing
 from itertools import ( chain, count, ifilterfalse, islice,
                         izip, tee )
 
@@ -12,38 +16,52 @@
 @var default_chunk_size: The default chunk size used by L{chunkify}.
 """
 
-default_chunk_size = 4096
+default_chunk_size = 8192
 
-def read_objs( s ):
+def read_pickle( read ):
     """
-    Given an input socket, reads in pickled objects from it. This is a
-    generator which yields those objects as they come. I assume that
-    the pickling format is resistant against partial (incomplete)
-    pickles.
+    Given a reader function L{read}, reads in pickled objects from it. I am a
+    generator which yields unpickled objects. I assume that the pickling
+    is "safe," done using L{safe_pickle}.
 
-    @param stream: The input stream.
-    @type stream: stream
+    @param read: The reader function that reads from a stream. It should take
+    a single argument, the number of bytes to consume.
+    @type read: function
+    """
+    with closing( StringIO() ) as stream:
+        obj = None # return this if we hit eof (not enough bytes read)
 
-    @param chunk_size: The size of the chunk (usually the number of
-    bytes to read).
-    @type chunk_size: int
+        def read_until( target ):
+            remain = target - streamlen( stream )
+            if remain > 0:
+                chunk = read( remain )
+                # append to end
+                stream.seek(0,2)
+                stream.write( chunk )
+            return stream.tell() >= target
+
+        if read_until(4):
+            stream.seek(0)
+            (length,) = unpack('i4', stream.read(4))
+            if read_until(length+4):
+                stream.seek(4)
+                obj = load(stream)
+
+        return ( obj, stream.read() )
+
+def safe_pickle( obj ):
     """
-    stream = StringIO()
-    while True:
-        chunk = s.recv( 8192 )
-        if len( chunk ) == 0: break
-        stream.write( chunk )
-        stream.seek( 0 )
-        while True:
-            try: obj = pickle.load( stream )
-            except ( EOFError, pickle.UnpicklingError, ValueError ): break
-            else:
-                yield obj
-                rem = stream.read()
-                stream.seek( 0 )
-                stream.write( rem )
-                stream.truncate()
+    Pickle L{obj} but prepends the serialized length in bytes.
+    """
+    msg = dumps(obj)
+    return pack('i4',len(msg)) + msg
 
+def write_pickle( obj, write ):
+    """
+    Write L{obj} using function L{write}, in a safe, pickle-able fashion.
+    """
+    return write( safe_pickle( obj ) )
+
 def streamlen( stream ):
     """
     Get the length of a stream (e.g. file stream or StringIO).
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 | 
| 
      
      
      From: <yan...@us...> - 2008-01-18 06:55:04
       | 
| Revision: 236
          http://assorted.svn.sourceforge.net/assorted/?rev=236&view=rev
Author:   yangzhang
Date:     2008-01-17 22:55:09 -0800 (Thu, 17 Jan 2008)
Log Message:
-----------
added read_pickles, touched up doc
Modified Paths:
--------------
    python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py	2008-01-17 23:57:23 UTC (rev 235)
+++ python-commons/trunk/src/commons/seqs.py	2008-01-18 06:55:09 UTC (rev 236)
@@ -27,6 +27,11 @@
     @param read: The reader function that reads from a stream. It should take
     a single argument, the number of bytes to consume.
     @type read: function
+
+    @return: A tuple whose first element is the deserialized object or None if
+    EOF was encountered, and whose second element is the remainder bytes until
+    the EOF that were not consumed by unpickling.
+    @rtype: (object, str)
     """
     with closing( StringIO() ) as stream:
         obj = None # return this if we hit eof (not enough bytes read)
@@ -49,6 +54,16 @@
 
         return ( obj, stream.read() )
 
+def read_pickles( read ):
+    """
+    Reads all the consecutively pickled objects from the L{read} function.
+    """
+    while True:
+        pair = ( obj, rem ) = read_pickle( read )
+        if obj is None:
+            break
+        yield pair
+
 def safe_pickle( obj ):
     """
     Pickle L{obj} but prepends the serialized length in bytes.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 | 
| 
      
      
      From: <yan...@us...> - 2008-01-18 09:01:45
       | 
| Revision: 239
          http://assorted.svn.sourceforge.net/assorted/?rev=239&view=rev
Author:   yangzhang
Date:     2008-01-18 01:01:43 -0800 (Fri, 18 Jan 2008)
Log Message:
-----------
updated pickling tools to allow/default to more efficient protocols
Modified Paths:
--------------
    python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py	2008-01-18 06:56:08 UTC (rev 238)
+++ python-commons/trunk/src/commons/seqs.py	2008-01-18 09:01:43 UTC (rev 239)
@@ -18,7 +18,7 @@
 
 default_chunk_size = 8192
 
-def read_pickle( read ):
+def read_pickle( read, init = '', length_thresh = 100000 ):
     """
     Given a reader function L{read}, reads in pickled objects from it. I am a
     generator which yields unpickled objects. I assume that the pickling
@@ -33,26 +33,36 @@
     the EOF that were not consumed by unpickling.
     @rtype: (object, str)
     """
-    with closing( StringIO() ) as stream:
+    with closing( StringIO() ) as sio:
         obj = None # return this if we hit eof (not enough bytes read)
+        sio.write( init )
 
         def read_until( target ):
-            remain = target - streamlen( stream )
+            remain = target - streamlen( sio )
             if remain > 0:
                 chunk = read( remain )
                 # append to end
-                stream.seek(0,2)
-                stream.write( chunk )
-            return stream.tell() >= target
+                sio.seek(0,2)
+                sio.write( chunk )
+            offset = streamlen( sio )
+            sio.seek(0)
+            return offset >= target
 
         if read_until(4):
-            stream.seek(0)
-            (length,) = unpack('i4', stream.read(4))
+            lengthstr = sio.read(4)
+            (length,) = unpack('i4', lengthstr)
+            if length_thresh is not None and length > length_thresh or \
+                    length <= 0:
+                warning( 'read_pickle',
+                         'got length', length,
+                         'streamlen', streamlen(sio),
+                         'first bytes %x %x %x %x' % tuple(map(ord,lengthstr)) )
             if read_until(length+4):
-                stream.seek(4)
-                obj = load(stream)
+                # start reading from right after header
+                sio.seek(4)
+                obj = load(sio)
 
-        return ( obj, stream.read() )
+        return ( obj, sio.read() )
 
 def read_pickles( read ):
     """
@@ -60,16 +70,23 @@
     """
     while True:
         pair = ( obj, rem ) = read_pickle( read )
-        if obj is None:
-            break
+        if obj is None: break
         yield pair
 
-def safe_pickle( obj ):
-    """
-    Pickle L{obj} but prepends the serialized length in bytes.
-    """
-    msg = dumps(obj)
-    return pack('i4',len(msg)) + msg
+class safe_pickler( object ):
+    def __init__( self, protocol = HIGHEST_PROTOCOL ):
+        self.sio = StringIO()
+        self.pickler = Pickler( self.sio, protocol )
+    def dumps( self, obj ):
+        """
+        Pickle L{obj} but prepends the serialized length in bytes.
+        """
+        self.pickler.clear_memo()
+        self.sio.seek(0)
+        self.pickler.dump(obj)
+        self.sio.truncate()
+        msg = self.sio.getvalue()
+        return pack('i4', self.sio.tell()) + msg
 
 def write_pickle( obj, write ):
     """
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 | 
| 
      
      
      From: <yan...@us...> - 2008-01-20 06:25:48
       | 
| Revision: 249
          http://assorted.svn.sourceforge.net/assorted/?rev=249&view=rev
Author:   yangzhang
Date:     2008-01-19 22:25:53 -0800 (Sat, 19 Jan 2008)
Log Message:
-----------
fixed missing import
Modified Paths:
--------------
    python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py	2008-01-20 06:25:30 UTC (rev 248)
+++ python-commons/trunk/src/commons/seqs.py	2008-01-20 06:25:53 UTC (rev 249)
@@ -1,7 +1,7 @@
 # -*- mode: python; tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4; -*-
 # vim:ft=python:et:sw=4:ts=4
 
-from __future__ import with_statement
+from __future__ import ( absolute_import, with_statement )
 
 from cStringIO import StringIO
 from cPickle import *
@@ -9,6 +9,7 @@
 from contextlib import closing
 from itertools import ( chain, count, ifilterfalse, islice,
                         izip, tee )
+from .log import warning
 
 """
 Sequences, streams, and generators.
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 | 
| 
      
      
      From: <yan...@us...> - 2008-04-29 00:31:42
       | 
| Revision: 686
          http://assorted.svn.sourceforge.net/assorted/?rev=686&view=rev
Author:   yangzhang
Date:     2008-04-28 17:31:41 -0700 (Mon, 28 Apr 2008)
Log Message:
-----------
addec countstep
Modified Paths:
--------------
    python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py	2008-04-25 21:38:05 UTC (rev 685)
+++ python-commons/trunk/src/commons/seqs.py	2008-04-29 00:31:41 UTC (rev 686)
@@ -341,6 +341,15 @@
             del chunk[ ( i + 1 ) % n : ]
             yield chunk
 
+def countstep(start, step):
+    """
+    Generate [start, start+step, start+2*step, start+3*step, ...].
+    """
+    i = start
+    while True:
+        yield i
+        i += step
+
 def take(n, seq):
     return list(islice(seq, n))
 
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 | 
| 
      
      
      From: <yan...@us...> - 2008-05-08 03:19:13
       | 
| Revision: 707
          http://assorted.svn.sourceforge.net/assorted/?rev=707&view=rev
Author:   yangzhang
Date:     2008-05-07 20:19:12 -0700 (Wed, 07 May 2008)
Log Message:
-----------
fixed missing import
Modified Paths:
--------------
    python-commons/trunk/src/commons/seqs.py
Modified: python-commons/trunk/src/commons/seqs.py
===================================================================
--- python-commons/trunk/src/commons/seqs.py	2008-05-08 03:18:57 UTC (rev 706)
+++ python-commons/trunk/src/commons/seqs.py	2008-05-08 03:19:12 UTC (rev 707)
@@ -8,7 +8,7 @@
 from struct import pack, unpack
 from contextlib import closing
 from itertools import ( chain, count, ifilterfalse, islice,
-                        izip, tee )
+                        izip, repeat, tee )
 from .log import warning
 
 """
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 |