From: Ype K. <yk...@xs...> - 2001-01-29 22:47:02
|
""" stringbuffer.py: mutable string built on java.lang.StringBuffer. The idea is to save as many string object creations as possible while still be about as fast as string's, when implemented in Java. This python module is intended as a prototype. The itch: I needed java.lang.StringBuffer functionality in jython on the receiving side of a socket. I grew tired of writing: buf += sock.recv(maxSize) which creates a new buf everytime sock.recv(maxSize) is called. Note that sock.recv() may return as little as it likes, depending on network traffic conditions. The alternative of keeping a list of received strings did not really appeal to me. The questions: - This may have been done umpteen times before, when so could someone please tell me? - I added the standard string methods. Only when these originally return a string I have them put the result back into the stringbuffer. Methods that return something else work as if they were used on a string. Is this the right approach? Esp. for split() and friends? - I don't know whether I handled python default arguments correctly, eg: def count(self, sub, start = None, end = None): if start == None: if end == None: return str(self).count(sub) else: return str(self).count(sub, 0, end) elif end == None: return str(self).count(sub, start) else: return str(self).count(sub, start, end) (str(self) should disappear in the java version ...) - Is an extend() method needed? It is marked as experimental in my python 2.0 documentation. - Should slicing be allowed with other steps than 1? - pop() also works for slices, is that ok.? Aside: in the sort() method this line gives an internal compiler error: l = [self[i] for i in range(len(self))] # internal compiler error: name i (jython2.0a3) Why? """ """ See PEP 203 for the methods to emulate the operators += and *= : __iadd__ and __imul__. Limitations: - Slicing should be done with step 1, and no extended slicing is provided (how should this be done?). - Many methods still create string objects, by using str(self). In java charAt() and getChars() can be used instead. - Sorting has no compare function yet. Not yet tested: - insert() - *= - lots more: test_string.py could be used, adapting the test function to look up the result in the stringbuffer itself where appropriate. From the Jython faq: 5.1. Java classes that emulate Jython Dictionaries and Sequences In order to emulate Dictionaries and Sequences, first your Java class must "extend" the org.python.core.PyObject class. The following methods can then be defined on your class in order to emulate these basic Jython types: public PyObject __finditem__(PyObject key); public void __setitem__(PyObject key, PyObject value); public void __delitem__(PyObject key); Additionally, you might want to throw the org.python.core.Py.KeyError object if you have any exceptions (Note, you need not declare the Java method as throwing anything.) """ from java.lang import StringBuffer from java.lang import String from org.python.core import PySlice from org.python.core import PyList # sort from PyList doesn't work. class stringbuffer: """ basic customization """ def __init__(self, s = None): if s == None: self.sb = StringBuffer() else: self.sb = StringBuffer(s) def dump(self): print self.sb.__class__, '<' + str(self) + '>' def __repr__(self): # repr(sb) return 'stringbuffer(' + repr(str(self)) + ')' def __str__(self): # str(sb) return self.sb.toString() # java.lang.StringBuffer.toString() def __len__(self): return self.sb.length() def __nonzero__(self): return self.sb.length() > 0 """ sequence type customization """ def ji(self, i): # java index if i < 0: return i + self.sb.length() else: return i def sliceJavaDefaults(self, sl): assert sl.step == 1 if sl.start == None: b = 0 else: b = self.ji(sl.start) if sl.stop == None: e = self.sb.length() else: e = self.ji(sl.stop) return b, e def __getitem__(self, i): # self[i], key may be slice if not isinstance(i, PySlice): return self.sb.charAt(self.ji(i)) else: b, e = self.sliceJavaDefaults(i) return self.sb.substring(b, e) def __setitem__(self, i, c): # self[i] = c, key may be slice if not isinstance(i, PySlice): self.sb.setCharAt(self.ji(i), c) else: b, e = self.sliceJavaDefaults(i) self.sb.replace(b, e, c) def __delitem__(self, i): # del self[i], key may be slice if not isinstance(i, PySlice): self.sb.deleteCharAt(self.ji(i)) else: b, e = self.sliceJavaDefaults(i) self.sb.delete(b, e) def min(self): return min(str(self)) def max(self): return max(str(self)) """ `numeric' operators, most can be left to string/String """ def __add__(self, s2): # self + s2 sum = stringbuffer(str(self)) sum.append(s2) return sum def __radd__(self, s2): # s2 + self sbf = stringbuffer(str(s)) sbf.append(self.sb) return sbf def __iadd__(self, s2): # self += s2 self.append(s2) return self def __mul__(self, i): # self * i return stringbuffer(str(self) * i) def __rmul__(self, i): # i * self return stringbuffer(str(self) * i) def __imul__(self, i): # self *= i # see PyString.java for better stuff. if i > 1: self.append(str(self) * (i - 1)) elif i == 0: del self[:] return self """ methods that must be provided for mutable sequences """ def append(self, s): self.sb.append(str(s)) """ Does append() need an ensureCapacity()? In python append() is meant for single elements, but python does not have the character type. This append() works for any argument that can be str()'d. """ def count(self, sub, start = None, end = None): """ nested if testing 4 cases: ok? """ if start == None: if end == None: return str(self).count(sub) else: return str(self).count(sub, 0, end) elif end == None: return str(self).count(sub, start) else: return str(self).count(sub, start, end) def index(self, sub, start = None, end = None): if start == None: if end == None: return str(self).index(sub) else: return str(self).index(sub, 0, end) elif end == None: return str(self).index(sub, start) else: return str(self).index(sub, start, end) def pop(self, i = None): # also works for slices if i == None: i = -1 """ inlined: c = self.__getitem__(i) self.__delitem__(i) """ if not isinstance(i, PySlice): jvi = self.ji(i) c = self.sb.charAt(jvi) self.sb.deleteCharAt(jvi) else: b, e = self.provideSliceDefaults(i) c = self.sb.substring(b, e) self.sb.delete(b, e) return c def remove(self, c): # remove first occurrence of string c i = self.index(c) # raises ValueError lc = len(c) if lc == 1: self.sb.deleteCharAt(i) else: self.sb.delete(i, i + lc) def sort(self): # sort the chars standard order, no compare function yet """ a long detour via the other mutable sequence """ # l = [self[i] for i in range(len(self))] # internal compiler error: name i (jython2.0a3) l = [] for i in range(len(self)): l.append(self[i]) l.sort() self.sb = StringBuffer(''.join(l)) def reverse(self): self.sb.reverse() """ remaining java StringBuffer methods """ def extend(self, s): self.append(s) # still experimental in python? def capacity(self): return self.sb.capacity() def ensureCapacity(self, cap): return self.sb.ensureCapacity(cap) """ superfluous: use of StringBuffer.insert() """ def setLength(self, newLength): self.sb.setLength(newLength) # truncate or add null chars. """ Almost all python string methods are also useful for stringbuffers. These are probably better implemented by getting inspiration from PyString.java. Straightforward adapting to putting the result back will do in most of the cases... """ def capitalize(self): self.sb = StringBuffer(str(self).capitalize()) def center(self): self.sb = StringBuffer(str(self).center()) def endswith(self, suffix, start = None, end = None): if start == None: if end == None: return str(self).endswith(sub) else: return str(self).endswith(sub, 0, end) elif end == None: return str(self).endswith(sub, start) else: return str(self).endswith(sub, start, end) def expandtabs(self, suffix, start = None, end = None): if start == None: if end == None: self.sb = StringBuffer(str(self).expandtabs(suffix)) else: self.sb = StringBuffer(str(self).expandtabs(suffix, 0, end)) elif end == None: self.sb = StringBuffer(str(self).expandtabs(suffix, start)) else: self.sb = StringBuffer(str(self).expandtabs(suffix, start, end)) def find(self, sub, start = None, end = None): if start == None: if end == None: return str(self).find(sub) else: return str(self).find(sub, 0, end) elif end == None: return str(self).find(sub, start) else: return str(self).find(sub, start, end) def isalnum(self): return str(self).isalnum() def isalpha(self): return str(self).isalpha() def isdigit(self): return str(self).isdigit() def islower(self): return str(self).islower() def isspace(self): return str(self).isspace() def istitle(self): return str(self).istitle() def isupper(self): return str(self).isupper() def isupper(self, seq): return str(self).join(seq) def join(self, seq): return str(self).join(seq) def ljust(self): self.sb = StringBuffer(str(self).ljust()) def lower(self): self.sb = StringBuffer(str(self).lower()) def lstrip(self): self.sb = StringBuffer(str(self).lstrip()) def replace(old,new,maxsplit = None): if maxsplit == None: self.sb = StringBuffer(str(self).replace(old,new)) else: self.sb = StringBuffer(str(self).replace(old,new,maxsplit)) def rfind(self, sub, start = None, end = None): if start == None: if end == None: return str(self).rfind(sub) else: return str(self).rfind(sub, 0, end) elif end == None: return str(self).rfind(sub, start) else: return str(self).rfind(sub, start, end) def rindex(self, sub, start = None, end = None): if start == None: if end == None: return str(self).rindex(sub) else: return str(self).rindex(sub, 0, end) elif end == None: return str(self).rindex(sub, start) else: return str(self).rindex(sub, start, end) def rjust(self): self.sb = StringBuffer(str(self).rjust()) def rstrip(self): self.sb = StringBuffer(str(self).rstrip()) def split(self, sep, maxsplit = None): # returns a list of strings, not stringbuffers if maxsplit == None: return str(self).split(sep) else: return str(self).split(sep, maxsplit) def splitlines(self, keepends = None): # returns a list of strings, not stringbuffers if keepends == None: return str(self).splitlines(sep) else: return str(self).splitlines(sep, keepends) def startswith(self, suffix, start = None, end = None): if start == None: if end == None: return str(self).startswith(sub) else: return str(self).startswith(sub, 0, end) elif end == None: return str(self).startswith(sub, start) else: return str(self).startswith(sub, start, end) def strip(self): self.sb = StringBuffer(str(self).strip()) def swapcase(self): self.sb = StringBuffer(str(self).swapcase()) def title(self): self.sb = StringBuffer(str(self).title()) def translate(self, table, deletechars = None): if deletechars == None: self.sb = StringBuffer(str(self).translate(table)) else: self.sb = StringBuffer(str(self).translate(table, deletechars)) def upper(self): self.sb = StringBuffer(str(self).upper()) if __name__ == '__main__': # some initial tests, all empty strings sb1 = stringbuffer() assert not sb1 assert len(sb1) == 0 assert str(sb1) == '' assert sb1[:0] == '' assert str(sb1 + sb1) == '' sb2 = stringbuffer() sb2 += sb1 assert not sb2 sb2 += '' assert len(sb2) == 0 sb2.append(sb1) assert len(sb2) == 0 # two non empty strings, indexing and slicing, __getitem__ s1 = 'a' s2 = 'bc' sb1 = stringbuffer(s1) assert len(sb1) == len(s1) sb2 = stringbuffer(s2) assert len(sb2) == len(s2) sb1 += s2 assert str(sb1) == (s1 + s2) assert sb1[:len(s1)] == s1 assert sb1[len(s1):] == s2 assert sb1[0] == s1[0] assert sb1[1] == s2[0] assert sb1[2] == s2[1] assert sb1[1:3] == s2 assert sb1[:] == (s1 + s2) # __setitem__ sb1 = stringbuffer(3) sb2 = sb1 sb1.setLength(3) # now three null chars assert sb1[:] == '\0\0\0' assert len(sb1) == 3 sb1[0] = 'a' sb1[1] = 'b' sb1[2] = 'c' assert sb1 is sb2 # check mutability assert sb1[:] == 'abc' sb1[1] = 'd' assert sb1[:] == 'adc' assert sb1 is sb2 sb1[0:2] = 'de' assert sb1[:] == 'dec' sb1[2:2] = 'f' assert sb1[:] == 'defc' sb1 = stringbuffer('ghij') sb2 = sb1 del sb1[0] assert sb1[:] == 'hij' assert sb1 is sb2 # __delitem__ sb1 = stringbuffer('ghij') sb2 = sb1 del sb1[1:3] assert sb1[:] == 'gj' assert sb1 is sb2 # reverse sb1 = stringbuffer('ghij') sb2 = sb1 sb1.reverse() assert sb1[:] == 'jihg' assert sb1 is sb2 # negative indexes assert sb1[-1] == 'g' assert sb1[:-1] == 'jih' sb1.remove('i') assert sb1[:] == 'jhg' try: sb1.remove('a') except ValueError: pass else: raise Exception, "should throw ValueError" assert sb1[:] == 'jhg' sb1.sort() assert sb1[:] == 'ghj' |