From: <zy...@us...> - 2009-03-22 14:00:56
|
Revision: 6107 http://jython.svn.sourceforge.net/jython/?rev=6107&view=rev Author: zyasoft Date: 2009-03-22 14:00:52 +0000 (Sun, 22 Mar 2009) Log Message: ----------- Fixes #1140: now uses pkgutil.get_loader.get_data to properly get the data files used by unicodedata data files were not being imported properly, as required for a standalone jar. Thanks Olivier Samyn for the initial version of the patch! Modified Paths: -------------- trunk/jython/Lib/unicodedata.py Modified: trunk/jython/Lib/unicodedata.py =================================================================== --- trunk/jython/Lib/unicodedata.py 2009-03-22 03:00:51 UTC (rev 6106) +++ trunk/jython/Lib/unicodedata.py 2009-03-22 14:00:52 UTC (rev 6107) @@ -1,19 +1,18 @@ -from __future__ import with_statement from bisect import bisect_left import operator -import os import java.lang.Character -# this is intended as a stopgap measure; at the very least it should -# be refactored so that we can avoid its slow startup time +# XXX - this is intended as a stopgap measure until 2.5.1, which will have a Java implementation # requires java 6 for `normalize` function +# only has one version of the database +# does not normalized ideographs -# XXX - change so we bind against the specific version _codepoints = {} _eaw = {} _names = {} _segments = [] _eaw_segments = [] +Nonesuch = object() def get_int(col): try: @@ -35,63 +34,53 @@ except: return None -def init(path): - with open(os.path.join(path, 'UnicodeData.txt')) as data: - for row in data: - cols = row.split(';') - codepoint = int(cols[0], 16) - name = cols[1] - if name == '<CJK Ideograph, Last>': - lookup_name = 'CJK UNIFIED IDEOGRAPH' - else: - lookup_name = name - data = ( - cols[2], - get_int(cols[3]), - cols[4], - cols[5], - get_int(cols[6]), - get_int(cols[7]), - get_numeric(cols[8]), - get_yn(cols[9]), - lookup_name, - ) +def init_unicodedata(data): + for row in data: + cols = row.split(';') + codepoint = int(cols[0], 16) + name = cols[1] + if name == '<CJK Ideograph, Last>': + lookup_name = 'CJK UNIFIED IDEOGRAPH' + else: + lookup_name = name + data = ( + cols[2], + get_int(cols[3]), + cols[4], + cols[5], + get_int(cols[6]), + get_int(cols[7]), + get_numeric(cols[8]), + get_yn(cols[9]), + lookup_name, + ) - if name.find('First') >= 0: - start = codepoint - elif name.find('Last') >= 0: - _segments.append((start, (start, codepoint), data)) - else: - _names[name] = unichr(codepoint) - _codepoints[codepoint] = data + if name.find('First') >= 0: + start = codepoint + elif name.find('Last') >= 0: + _segments.append((start, (start, codepoint), data)) + else: + _names[name] = unichr(codepoint) + _codepoints[codepoint] = data -def init_east_asian_width(path): - with open(os.path.join(path, 'EastAsianWidth.txt')) as data: - for row in data: - if row.startswith('#'): - continue - row = row.partition('#')[0] - cols = row.split(';') - if len(cols) < 2: - continue - cr = cols[0].split('..') - width = cols[1].rstrip() - if len(cr) == 1: - codepoint = int(cr[0], 16) - _eaw[codepoint] = width - else: - start = int(cr[0], 16) - end = int(cr[1], 16) - _eaw_segments.append((start, (start, end), width)) +def init_east_asian_width(data): + for row in data: + if row.startswith('#'): + continue + row = row.partition('#')[0] + cols = row.split(';') + if len(cols) < 2: + continue + cr = cols[0].split('..') + width = cols[1].rstrip() + if len(cr) == 1: + codepoint = int(cr[0], 16) + _eaw[codepoint] = width + else: + start = int(cr[0], 16) + end = int(cr[1], 16) + _eaw_segments.append((start, (start, end), width)) - -# this doesn't work in general, but it should be ok in this case since -# core libraries don't go through a zip import; see PEP 302 if we -# actually need to do any loader magic -my_path = os.path.dirname(__file__) -init(my_path) -init_east_asian_width(my_path) - # xxx - need to normalize the segments, so # <CJK Ideograph, Last> ==> CJK UNIFIED IDEOGRAPH; # may need to do some sort of analysis against CPython for the normalization! @@ -124,7 +113,6 @@ return segment[2] return None -Nonesuch = object() def get_codepoint(unichr, fn=None): if not(isinstance(unichr, unicode)): @@ -225,3 +213,17 @@ except ImportError: pass + + +def init(): + import pkgutil + import os.path + import StringIO + import sys + + my_path = os.path.dirname(__file__) + loader = pkgutil.get_loader('unicodedata') + init_unicodedata(StringIO.StringIO(loader.get_data(os.path.join(my_path,'UnicodeData.txt')))) + init_east_asian_width(StringIO.StringIO(loader.get_data(os.path.join(my_path,'EastAsianWidth.txt')))) + +init() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |