|
From: Ype K. <yk...@xs...> - 2001-01-29 22:47:02
|
"""
stringbuffer.py: mutable string built on java.lang.StringBuffer.
The idea is to save as many string object creations as possible
while still be about as fast as string's, when implemented
in Java. This python module is intended as a prototype.
The itch:
I needed java.lang.StringBuffer functionality in jython
on the receiving side of a socket.
I grew tired of writing:
buf += sock.recv(maxSize)
which creates a new buf everytime
sock.recv(maxSize) is called. Note that sock.recv()
may return as little as it likes, depending on
network traffic conditions.
The alternative of keeping a list of received
strings did not really appeal to me.
The questions:
- This may have been done umpteen times before, when so
could someone please tell me?
- I added the standard string methods.
Only when these originally return a string I have them put
the result back into the stringbuffer.
Methods that return something else work as if they
were used on a string.
Is this the right approach? Esp. for split() and friends?
- I don't know whether I handled python default arguments
correctly, eg:
def count(self, sub, start = None, end = None):
if start == None:
if end == None:
return str(self).count(sub)
else:
return str(self).count(sub, 0, end)
elif end == None:
return str(self).count(sub, start)
else:
return str(self).count(sub, start, end)
(str(self) should disappear in the java version ...)
- Is an extend() method needed? It is marked as experimental
in my python 2.0 documentation.
- Should slicing be allowed with other steps than 1?
- pop() also works for slices, is that ok.?
Aside: in the sort() method this line gives an internal compiler error:
l = [self[i] for i in range(len(self))] # internal compiler error:
name i (jython2.0a3)
Why?
"""
"""
See PEP 203 for the methods to emulate the
operators += and *= : __iadd__ and __imul__.
Limitations:
- Slicing should be done with step 1, and no extended slicing is
provided (how should this be done?).
- Many methods still create string objects, by using str(self).
In java charAt() and getChars() can be used instead.
- Sorting has no compare function yet.
Not yet tested:
- insert()
- *=
- lots more: test_string.py could be used, adapting the test function
to look up the result in
the stringbuffer itself where appropriate.
From the Jython faq:
5.1. Java classes that emulate Jython Dictionaries and Sequences
In order to emulate Dictionaries and Sequences, first your Java
class must "extend" the org.python.core.PyObject class.
The following methods can then be defined on your class in order to
emulate these basic Jython types:
public PyObject __finditem__(PyObject key);
public void __setitem__(PyObject key, PyObject value);
public void __delitem__(PyObject key);
Additionally, you might want to throw the
org.python.core.Py.KeyError object if you have any exceptions
(Note, you need not declare the Java method as throwing anything.)
"""
from java.lang import StringBuffer
from java.lang import String
from org.python.core import PySlice
from org.python.core import PyList # sort from PyList doesn't work.
class stringbuffer:
""" basic customization """
def __init__(self, s = None):
if s == None: self.sb = StringBuffer()
else: self.sb = StringBuffer(s)
def dump(self): print self.sb.__class__, '<' + str(self) + '>'
def __repr__(self): # repr(sb)
return 'stringbuffer(' + repr(str(self)) + ')'
def __str__(self): # str(sb)
return self.sb.toString() # java.lang.StringBuffer.toString()
def __len__(self):
return self.sb.length()
def __nonzero__(self):
return self.sb.length() > 0
""" sequence type customization """
def ji(self, i): # java index
if i < 0:
return i + self.sb.length()
else:
return i
def sliceJavaDefaults(self, sl):
assert sl.step == 1
if sl.start == None: b = 0
else: b = self.ji(sl.start)
if sl.stop == None: e = self.sb.length()
else: e = self.ji(sl.stop)
return b, e
def __getitem__(self, i): # self[i], key may be slice
if not isinstance(i, PySlice):
return self.sb.charAt(self.ji(i))
else:
b, e = self.sliceJavaDefaults(i)
return self.sb.substring(b, e)
def __setitem__(self, i, c): # self[i] = c, key may be slice
if not isinstance(i, PySlice):
self.sb.setCharAt(self.ji(i), c)
else:
b, e = self.sliceJavaDefaults(i)
self.sb.replace(b, e, c)
def __delitem__(self, i): # del self[i], key may be slice
if not isinstance(i, PySlice):
self.sb.deleteCharAt(self.ji(i))
else:
b, e = self.sliceJavaDefaults(i)
self.sb.delete(b, e)
def min(self): return min(str(self))
def max(self): return max(str(self))
""" `numeric' operators, most can be left to string/String """
def __add__(self, s2): # self + s2
sum = stringbuffer(str(self))
sum.append(s2)
return sum
def __radd__(self, s2): # s2 + self
sbf = stringbuffer(str(s))
sbf.append(self.sb)
return sbf
def __iadd__(self, s2): # self += s2
self.append(s2)
return self
def __mul__(self, i): # self * i
return stringbuffer(str(self) * i)
def __rmul__(self, i): # i * self
return stringbuffer(str(self) * i)
def __imul__(self, i): # self *= i
# see PyString.java for better stuff.
if i > 1: self.append(str(self) * (i - 1))
elif i == 0: del self[:]
return self
""" methods that must be provided for mutable sequences """
def append(self, s): self.sb.append(str(s))
"""
Does append() need an ensureCapacity()?
In python append() is meant for single elements,
but python does not have the character type.
This append() works for any argument that can be str()'d.
"""
def count(self, sub, start = None, end = None):
""" nested if testing 4 cases: ok? """
if start == None:
if end == None: return str(self).count(sub)
else: return str(self).count(sub, 0, end)
elif end == None: return str(self).count(sub, start)
else: return str(self).count(sub, start, end)
def index(self, sub, start = None, end = None):
if start == None:
if end == None: return str(self).index(sub)
else: return str(self).index(sub, 0, end)
elif end == None: return str(self).index(sub, start)
else: return str(self).index(sub, start, end)
def pop(self, i = None): # also works for slices
if i == None: i = -1
"""
inlined:
c = self.__getitem__(i)
self.__delitem__(i)
"""
if not isinstance(i, PySlice):
jvi = self.ji(i)
c = self.sb.charAt(jvi)
self.sb.deleteCharAt(jvi)
else:
b, e = self.provideSliceDefaults(i)
c = self.sb.substring(b, e)
self.sb.delete(b, e)
return c
def remove(self, c): # remove first occurrence of string c
i = self.index(c) # raises ValueError
lc = len(c)
if lc == 1: self.sb.deleteCharAt(i)
else: self.sb.delete(i, i + lc)
def sort(self): # sort the chars standard order, no compare function yet
""" a long detour via the other mutable sequence """
# l = [self[i] for i in range(len(self))] # internal compiler
error: name i (jython2.0a3)
l = []
for i in range(len(self)):
l.append(self[i])
l.sort()
self.sb = StringBuffer(''.join(l))
def reverse(self): self.sb.reverse()
""" remaining java StringBuffer methods """
def extend(self, s): self.append(s) # still experimental in python?
def capacity(self): return self.sb.capacity()
def ensureCapacity(self, cap): return self.sb.ensureCapacity(cap)
""" superfluous: use of StringBuffer.insert() """
def setLength(self, newLength): self.sb.setLength(newLength) #
truncate or add null chars.
"""
Almost all python string methods are also useful for stringbuffers.
These are probably better implemented by getting inspiration from
PyString.java.
Straightforward adapting to putting the result back will do in most of the
cases...
"""
def capitalize(self): self.sb = StringBuffer(str(self).capitalize())
def center(self): self.sb = StringBuffer(str(self).center())
def endswith(self, suffix, start = None, end = None):
if start == None:
if end == None: return str(self).endswith(sub)
else: return str(self).endswith(sub, 0, end)
elif end == None: return str(self).endswith(sub, start)
else: return str(self).endswith(sub, start, end)
def expandtabs(self, suffix, start = None, end = None):
if start == None:
if end == None: self.sb =
StringBuffer(str(self).expandtabs(suffix))
else: self.sb = StringBuffer(str(self).expandtabs(suffix, 0, end))
elif end == None: self.sb =
StringBuffer(str(self).expandtabs(suffix, start))
else: self.sb = StringBuffer(str(self).expandtabs(suffix, start, end))
def find(self, sub, start = None, end = None):
if start == None:
if end == None: return str(self).find(sub)
else: return str(self).find(sub, 0, end)
elif end == None: return str(self).find(sub, start)
else: return str(self).find(sub, start, end)
def isalnum(self): return str(self).isalnum()
def isalpha(self): return str(self).isalpha()
def isdigit(self): return str(self).isdigit()
def islower(self): return str(self).islower()
def isspace(self): return str(self).isspace()
def istitle(self): return str(self).istitle()
def isupper(self): return str(self).isupper()
def isupper(self, seq): return str(self).join(seq)
def join(self, seq): return str(self).join(seq)
def ljust(self): self.sb = StringBuffer(str(self).ljust())
def lower(self): self.sb = StringBuffer(str(self).lower())
def lstrip(self): self.sb = StringBuffer(str(self).lstrip())
def replace(old,new,maxsplit = None):
if maxsplit == None: self.sb = StringBuffer(str(self).replace(old,new))
else: self.sb = StringBuffer(str(self).replace(old,new,maxsplit))
def rfind(self, sub, start = None, end = None):
if start == None:
if end == None: return str(self).rfind(sub)
else: return str(self).rfind(sub, 0, end)
elif end == None: return str(self).rfind(sub, start)
else: return str(self).rfind(sub, start, end)
def rindex(self, sub, start = None, end = None):
if start == None:
if end == None: return str(self).rindex(sub)
else: return str(self).rindex(sub, 0, end)
elif end == None: return str(self).rindex(sub, start)
else: return str(self).rindex(sub, start, end)
def rjust(self): self.sb = StringBuffer(str(self).rjust())
def rstrip(self): self.sb = StringBuffer(str(self).rstrip())
def split(self, sep, maxsplit = None): # returns a list of
strings, not stringbuffers
if maxsplit == None:
return str(self).split(sep)
else:
return str(self).split(sep, maxsplit)
def splitlines(self, keepends = None): # returns a list of
strings, not stringbuffers
if keepends == None:
return str(self).splitlines(sep)
else:
return str(self).splitlines(sep, keepends)
def startswith(self, suffix, start = None, end = None):
if start == None:
if end == None: return str(self).startswith(sub)
else: return str(self).startswith(sub, 0, end)
elif end == None: return str(self).startswith(sub, start)
else: return str(self).startswith(sub, start, end)
def strip(self): self.sb = StringBuffer(str(self).strip())
def swapcase(self): self.sb = StringBuffer(str(self).swapcase())
def title(self): self.sb = StringBuffer(str(self).title())
def translate(self, table, deletechars = None):
if deletechars == None:
self.sb = StringBuffer(str(self).translate(table))
else:
self.sb = StringBuffer(str(self).translate(table, deletechars))
def upper(self): self.sb = StringBuffer(str(self).upper())
if __name__ == '__main__':
# some initial tests, all empty strings
sb1 = stringbuffer()
assert not sb1
assert len(sb1) == 0
assert str(sb1) == ''
assert sb1[:0] == ''
assert str(sb1 + sb1) == ''
sb2 = stringbuffer()
sb2 += sb1
assert not sb2
sb2 += ''
assert len(sb2) == 0
sb2.append(sb1)
assert len(sb2) == 0
# two non empty strings, indexing and slicing, __getitem__
s1 = 'a'
s2 = 'bc'
sb1 = stringbuffer(s1)
assert len(sb1) == len(s1)
sb2 = stringbuffer(s2)
assert len(sb2) == len(s2)
sb1 += s2
assert str(sb1) == (s1 + s2)
assert sb1[:len(s1)] == s1
assert sb1[len(s1):] == s2
assert sb1[0] == s1[0]
assert sb1[1] == s2[0]
assert sb1[2] == s2[1]
assert sb1[1:3] == s2
assert sb1[:] == (s1 + s2)
# __setitem__
sb1 = stringbuffer(3)
sb2 = sb1
sb1.setLength(3) # now three null chars
assert sb1[:] == '\0\0\0'
assert len(sb1) == 3
sb1[0] = 'a'
sb1[1] = 'b'
sb1[2] = 'c'
assert sb1 is sb2 # check mutability
assert sb1[:] == 'abc'
sb1[1] = 'd'
assert sb1[:] == 'adc'
assert sb1 is sb2
sb1[0:2] = 'de'
assert sb1[:] == 'dec'
sb1[2:2] = 'f'
assert sb1[:] == 'defc'
sb1 = stringbuffer('ghij')
sb2 = sb1
del sb1[0]
assert sb1[:] == 'hij'
assert sb1 is sb2
# __delitem__
sb1 = stringbuffer('ghij')
sb2 = sb1
del sb1[1:3]
assert sb1[:] == 'gj'
assert sb1 is sb2
# reverse
sb1 = stringbuffer('ghij')
sb2 = sb1
sb1.reverse()
assert sb1[:] == 'jihg'
assert sb1 is sb2
# negative indexes
assert sb1[-1] == 'g'
assert sb1[:-1] == 'jih'
sb1.remove('i')
assert sb1[:] == 'jhg'
try: sb1.remove('a')
except ValueError: pass
else: raise Exception, "should throw ValueError"
assert sb1[:] == 'jhg'
sb1.sort()
assert sb1[:] == 'ghj'
|
|
From: <bc...@wo...> - 2001-02-02 13:08:20
|
[Ype Kingma]
>"""
>stringbuffer.py: mutable string built on java.lang.StringBuffer.
>
>The idea is to save as many string object creations as possible
>while still be about as fast as string's, when implemented
>in Java. This python module is intended as a prototype.
>
>
>The itch:
>
>I needed java.lang.StringBuffer functionality in jython
>on the receiving side of a socket.
>I grew tired of writing:
> buf += sock.recv(maxSize)
>which creates a new buf everytime
>sock.recv(maxSize) is called. Note that sock.recv()
>may return as little as it likes, depending on
>network traffic conditions.
>The alternative of keeping a list of received
>strings did not really appeal to me.
>
>
>The questions:
>
>- This may have been done umpteen times before, when so
> could someone please tell me?
There is a UserString.MutableString class, but it have a diffrent
purpose. A module that wraps a StringBuffer could be usefull as a
standard module.
>- I added the standard string methods.
> Only when these originally return a string I have them put
> the result back into the stringbuffer.
> Methods that return something else work as if they
> were used on a string.
> Is this the right approach? Esp. for split() and friends?
Hard to say. If you haven't already, you can take a look at which
methods in UserString.UserString that returns strings and which that
return UserStrings.
>- I don't know whether I handled python default arguments
> correctly, eg:
>
> def count(self, sub, start = None, end = None):
> if start == None:
> if end == None:
> return str(self).count(sub)
> else:
> return str(self).count(sub, 0, end)
> elif end == None:
> return str(self).count(sub, start)
> else:
> return str(self).count(sub, start, end)
>
> (str(self) should disappear in the java version ...)
That is too much work. UserString simply does:
def count(self, sub, start=0, end=sys.maxint):
return self.data.count(sub, start, end)
Another way to deduce the default values are by looking at the PyString
sources for count(..):
public int count(String sub) {
return count(sub, 0, string.length());
}
public int count(String sub, int start) {
return count(sub, start, string.length());
}
public int count(String sub, int start, int end) {
...
}
The last count method will validate the start/end indexes so the
sys.maxint trick also works.
>- Is an extend() method needed? It is marked as experimental
> in my python 2.0 documentation.
Hard to say. The [].extend() is now used by standard python library so I
think it is safe to say that it will remain. I see no problem with
adding it to stringbuffer and marking it non-experimental.
>- Should slicing be allowed with other steps than 1?
No. I don't see much use for it. After all, strings are not meant as
general sequence. We have real lists for that.
>- pop() also works for slices, is that ok.?
I don't see much use for that either. It's a minor thing.
>Aside: in the sort() method this line gives an internal compiler error:
> l = [self[i] for i in range(len(self))] # internal compiler error:
>name i (jython2.0a3)
>Why?
Because there was an internal compiler error in 2.0alpha3 <wink>. It was
a bug where fast locals in listcomps was not correctly detected.
OTOH, sorting a stringbuffer is bogus. Cute but mostly useless. I'd
rather avoid adding a sort method.
I modified a copy if CPython's test_userstring.py. You should try it, it
caught some typos. It must be run from CPython's ./Lib/test directory.
#!/usr/bin/env python
import sys, string
from test_support import verbose
import string_tests
# UserString is a wrapper around the native builtin string type.
# UserString instances should behave similar to builtin string objects.
# The test cases were in part derived from 'test_string.py'.
from stringbuffer import stringbuffer
if __name__ == "__main__":
verbose = 0
tested_methods = {}
def test(methodname, input, *args):
global tested_methods
tested_methods[methodname] = 1
if verbose:
print '%s.%s(%s) ' % (input, methodname, args),
u = stringbuffer(input)
objects = [input, u, stringbuffer(u)]
res = [""] * 3
for i in range(3):
object = objects[i]
try:
f = getattr(object, methodname)
res[i] = apply(f, args)
except:
res[i] = sys.exc_type
if res[0] != res[1]:
if verbose:
print 'no'
print `input`, f, `res[0]`, "<>", `res[1]`
else:
if verbose:
print 'yes'
if res[1] != res[2]:
if verbose:
print 'no'
print `input`, f, `res[1]`, "<>", `res[2]`
else:
if verbose:
print 'yes'
string_tests.run_method_tests(test)
regards,
finn
|