From: <fwi...@us...> - 2009-01-09 04:50:29
|
Revision: 5899 http://jython.svn.sourceforge.net/jython/?rev=5899&view=rev Author: fwierzbicki Date: 2009-01-09 04:50:22 +0000 (Fri, 09 Jan 2009) Log Message: ----------- Merged revisions 5889-5893 via svnmerge from https://jython.svn.sourceforge.net/svnroot/jython/trunk/jython ........ r5889 | pjenvey | 2009-01-08 16:23:29 -0500 (Thu, 08 Jan 2009) | 5 lines jython-elementtree from: http://jython-elementtree.googlecode.com/svn/trunk@69 provides a partial emulation of pyexpat via xerces 2.9.1 and extra tests in test_xml_etree_jy from Sebastien Boisgerault ........ r5890 | pjenvey | 2009-01-08 16:26:19 -0500 (Thu, 08 Jan 2009) | 2 lines xerces 2.9.1 for elementtree ........ r5891 | pjenvey | 2009-01-08 16:49:36 -0500 (Thu, 08 Jan 2009) | 2 lines jarjar xerces into jython-complete and check for the mangled name ........ r5892 | pjenvey | 2009-01-08 17:05:28 -0500 (Thu, 08 Jan 2009) | 2 lines unused import, coding standards ........ r5893 | otmarhumbel | 2009-01-08 17:54:04 -0500 (Thu, 08 Jan 2009) | 10 lines rename the jython*.jar files: jython.jar is now called jython-dev.jar jython-complete.jar is now called jython.jar the one we distribute is jython.jar, containing the necessary external libraries, mangled jython-dev.jar is intended for local developer usage, containing no external libraries, referencing the real external package names ........ Modified Paths: -------------- branches/jy3k/CPythonLib.includes branches/jy3k/Lib/test/regrtest.py branches/jy3k/Lib/test/test_xml_etree.py branches/jy3k/Lib/test/test_xml_etree_c.py branches/jy3k/bugtests/README.txt branches/jy3k/bugtests/test386.py branches/jy3k/bugtests/test394.py branches/jy3k/bugtests/test394jar/MANIFEST.MF branches/jy3k/build.xml branches/jy3k/src/org/python/core/PySystemState.java branches/jy3k/src/org/python/util/JythoncAntTask.java branches/jy3k/src/shell/jython branches/jy3k/src/shell/jython.bat Added Paths: ----------- branches/jy3k/Lib/pyexpat.py branches/jy3k/Lib/test/test_xml_etree_jy.py branches/jy3k/Lib/xml/parsers/ branches/jy3k/Lib/xml/parsers/__init__.py branches/jy3k/Lib/xml/parsers/expat.py branches/jy3k/extlibs/xercesImpl.jar Removed Paths: ------------- branches/jy3k/Lib/xml/parsers/__init__.py branches/jy3k/Lib/xml/parsers/expat.py Property Changed: ---------------- branches/jy3k/ Property changes on: branches/jy3k ___________________________________________________________________ Modified: svnmerge-integrated - /trunk/jython:1-5885 + /trunk/jython:1-5898 Modified: branches/jy3k/CPythonLib.includes =================================================================== --- branches/jy3k/CPythonLib.includes 2009-01-09 04:32:35 UTC (rev 5898) +++ branches/jy3k/CPythonLib.includes 2009-01-09 04:50:22 UTC (rev 5899) @@ -8,6 +8,7 @@ encodings/** logging/* test/** +xml/etree/** # Lib files, in alphabetical order: __future__.py Copied: branches/jy3k/Lib/pyexpat.py (from rev 5893, trunk/jython/Lib/pyexpat.py) =================================================================== --- branches/jy3k/Lib/pyexpat.py (rev 0) +++ branches/jy3k/Lib/pyexpat.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -0,0 +1 @@ +from xml.parsers.expat import * Modified: branches/jy3k/Lib/test/regrtest.py =================================================================== --- branches/jy3k/Lib/test/regrtest.py 2009-01-09 04:32:35 UTC (rev 5898) +++ branches/jy3k/Lib/test/regrtest.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -1449,7 +1449,6 @@ test_wave test_winreg test_winsound - test_xml_etree_c test_zipfile64 """ } @@ -1487,7 +1486,6 @@ test_ucn test_unicode test_unicodedata - test_xml_etree test_zipimport """, } Modified: branches/jy3k/Lib/test/test_xml_etree.py =================================================================== --- branches/jy3k/Lib/test/test_xml_etree.py 2009-01-09 04:32:35 UTC (rev 5898) +++ branches/jy3k/Lib/test/test_xml_etree.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -209,7 +209,7 @@ >>> check_encoding(ET, "iso-8859-1") >>> check_encoding(ET, "iso-8859-15") >>> check_encoding(ET, "cp437") - >>> check_encoding(ET, "mac-roman") + >>> #check_encoding(ET, "mac-roman") """ ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding) Modified: branches/jy3k/Lib/test/test_xml_etree_c.py =================================================================== --- branches/jy3k/Lib/test/test_xml_etree_c.py 2009-01-09 04:32:35 UTC (rev 5898) +++ branches/jy3k/Lib/test/test_xml_etree_c.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -198,7 +198,7 @@ >>> check_encoding("iso-8859-1") >>> check_encoding("iso-8859-15") >>> check_encoding("cp437") - >>> check_encoding("mac-roman") + >>> #check_encoding("mac-roman") """ ET.XML( "<?xml version='1.0' encoding='%s'?><xml />" % encoding Copied: branches/jy3k/Lib/test/test_xml_etree_jy.py (from rev 5893, trunk/jython/Lib/test/test_xml_etree_jy.py) =================================================================== --- branches/jy3k/Lib/test/test_xml_etree_jy.py (rev 0) +++ branches/jy3k/Lib/test/test_xml_etree_jy.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -0,0 +1,754 @@ +# encoding: utf-8 + +import sys +JYTHON = sys.platform.startswith("java") + +import doctest + +import xml.parsers.expat as expat +from xml.etree.ElementTree import * + +def jython(function): + if JYTHON: + return function + else: + return None + +class sortdict(dict): + def __repr__(self): + items = self.items() + items.sort() + pairs = ["%r: %r" % pair for pair in items] + return "{%s}" % ", ".join(pairs) + __str__ = __repr__ + + +class Outputter: + def StartElementHandler(self, name, attrs): + print 'Start element:\n ', repr(name), sortdict(attrs) + + def EndElementHandler(self, name): + print 'End element:\n ', repr(name) + + def CharacterDataHandler(self, data): + data = data.strip() + if data: + print 'Character data:' + print ' ', repr(data) + + def ProcessingInstructionHandler(self, target, data): + print 'PI:\n ', repr(target), repr(data) + + def StartNamespaceDeclHandler(self, prefix, uri): + print 'NS decl:\n ', repr(prefix), repr(uri) + + def EndNamespaceDeclHandler(self, prefix): + print 'End of NS decl:\n ', repr(prefix) + + def StartCdataSectionHandler(self): + print 'Start of CDATA section' + + def EndCdataSectionHandler(self): + print 'End of CDATA section' + + def CommentHandler(self, text): + print 'Comment:\n ', repr(text) + + def NotationDeclHandler(self, *args): + name, base, sysid, pubid = args + print 'Notation declared:', args + + def UnparsedEntityDeclHandler(self, *args): + entityName, base, systemId, publicId, notationName = args + print 'Unparsed entity decl:\n ', args + + def NotStandaloneHandler(self, userData): + print 'Not standalone' + return 1 + + def ExternalEntityRefHandler(self, *args): + context, base, sysId, pubId = args + print 'External entity ref:', args[1:] + return 1 + + def DefaultHandler(self, userData): + pass + + def DefaultHandlerExpand(self, userData): + pass + +_= """ + >>> data = '''\ + ... <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> + ... <?xml-stylesheet href="stylesheet.css"?> + ... <!-- comment data --> + ... <!DOCTYPE quotations SYSTEM "quotations.dtd" [ + ... <!ELEMENT root ANY> + ... <!NOTATION notation SYSTEM "notation.jpeg"> + ... <!ENTITY acirc "â"> + ... <!ENTITY external_entity SYSTEM "entity.file"> + ... <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> + ... %unparsed_entity; + ... ]> + ... + ... <root attr1="value1" attr2="value2ὀ"> + ... <myns:subelement xmlns:myns="http://www.python.org/namespace"> + ... Contents of subelements + ... </myns:subelement> + ... <sub2><![CDATA[contents of CDATA section]]></sub2> + ... &external_entity; + ... </root> + ... ''' + """ + +def test_utf8(): + """ + Source: test_pyexpat.py + Changes: replaced tabs with spaces in Outputter to ease doctest integration + + >>> out = Outputter() + >>> parser = expat.ParserCreate(namespace_separator='!') + >>> HANDLER_NAMES = [ + ... 'StartElementHandler', 'EndElementHandler', + ... 'CharacterDataHandler', + ... 'ProcessingInstructionHandler', + ... 'UnparsedEntityDeclHandler', 'NotationDeclHandler', + ... 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', + ... 'CommentHandler', 'StartCdataSectionHandler', + ... 'EndCdataSectionHandler', + ... 'DefaultHandler', 'DefaultHandlerExpand', + ... #'NotStandaloneHandler', + ... 'ExternalEntityRefHandler' + ... ] + >>> for name in HANDLER_NAMES: + ... setattr(parser, name, getattr(out, name)) + + >>> data = '''\\ + ... <?xml version="1.0" encoding="iso-8859-1" standalone="no"?> + ... <?xml-stylesheet href="stylesheet.css"?> + ... <!-- comment data --> + ... <!DOCTYPE quotations SYSTEM "quotations.dtd" [ + ... <!ELEMENT root ANY> + ... <!NOTATION notation SYSTEM "notation.jpeg"> + ... <!ENTITY acirc "â"> + ... <!ENTITY external_entity SYSTEM "entity.file"> + ... <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> + ... %unparsed_entity; + ... ]> + ... + ... <root attr1="value1" attr2="value2ὀ"> + ... <myns:subelement xmlns:myns="http://www.python.org/namespace"> + ... Contents of subelements + ... </myns:subelement> + ... <sub2><![CDATA[contents of CDATA section]]></sub2> + ... &external_entity; + ... </root> + ... ''' + + #Produce UTF-8 output + #>>> parser.returns_unicode = 0 + #>>> try: + #... parser.Parse(data, 1) + #... except expat.error: + #... print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode) + #... print '** Line', parser.ErrorLineNumber + #... print '** Column', parser.ErrorColumnNumber + #... print '** Byte', parser.ErrorByteIndex + #PI: + #'xml-stylesheet' 'href="stylesheet.css"' + #Comment: + #' comment data ' + #Notation declared: ('notation', None, 'notation.jpeg', None) + #Unparsed entity decl: + #('unparsed_entity', None, 'entity.file', None, 'notation') + #Start element: + #'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'} + #NS decl: + #'myns' 'http://www.python.org/namespace' + #Start element: + #'http://www.python.org/namespace!subelement' {} + #Character data: + #'Contents of subelements' + #End element: + #'http://www.python.org/namespace!subelement' + #End of NS decl: + #'myns' + #Start element: + #'sub2' {} + #Start of CDATA section + #Character data: + #'contents of CDATA section' + #End of CDATA section + #End element: + #'sub2' + #External entity ref: (None, 'entity.file', None) + #End element: + #'root' + #1 + + >>> parser = expat.ParserCreate(namespace_separator='!') + >>> parser.returns_unicode = 1 + >>> for name in HANDLER_NAMES: + ... setattr(parser, name, getattr(out, name)) + >>> try: + ... parser.Parse(data, 1) + ... except expat.error: + ... print '** Line', parser.ErrorLineNumber + ... print '** Column', parser.ErrorColumnNumber + ... print '** Byte', parser.ErrorByteIndex #doctest: +REPORT_UDIFF + PI: + u'xml-stylesheet' u'href="stylesheet.css"' + Comment: + u' comment data ' + Notation declared: (u'notation', None, u'notation.jpeg', None) + Unparsed entity decl: + (u'unparsed_entity', None, u'entity.file', None, u'notation') + Start element: + u'root' {u'attr1': u'value1', u'attr2': u'value2\u1f40'} + NS decl: + u'myns' u'http://www.python.org/namespace' + Start element: + u'http://www.python.org/namespace!subelement' {} + Character data: + u'Contents of subelements' + End element: + u'http://www.python.org/namespace!subelement' + End of NS decl: + u'myns' + Start element: + u'sub2' {} + Start of CDATA section + Character data: + u'contents of CDATA section' + End of CDATA section + End element: + u'sub2' + External entity ref: (None, u'entity.file', None) + End element: + u'root' + 1 + """ + + +def test_import_as_pyexpat(): + """ + >>> import pyexpat as expat + >>> expat #doctest: +ELLIPSIS + <module 'pyexpat' from ...> + """ + + +def test_errors_submodule(): + """ + >>> import xml.parsers.expat as expat + >>> expat.errors + <module 'pyexpat.errors' (built-in)> + >>> dir(expat.errors) #doctest: +ELLIPSIS + ['XML_ERROR_ABORTED', ..., 'XML_ERROR_XML_DECL', '__doc__', '__name__'] + >>> expat.errors.XML_ERROR_ABORTED + 'parsing aborted' + >>> expat.errors.XML_ERROR_XML_DECL + 'XML declaration not well-formed' + """ + +def test_model_submodule(): + """ + >>> import xml.parsers.expat as expat + >>> expat.model + <module 'pyexpat.model' (built-in)> + >>> print sortdict(expat.model.__dict__) + {'XML_CQUANT_NONE': 0, 'XML_CQUANT_OPT': 1, 'XML_CQUANT_PLUS': 3, 'XML_CQUANT_REP': 2, 'XML_CTYPE_ANY': 2, 'XML_CTYPE_CHOICE': 5, 'XML_CTYPE_EMPTY': 1, 'XML_CTYPE_MIXED': 3, 'XML_CTYPE_NAME': 4, 'XML_CTYPE_SEQ': 6, '__doc__': 'Constants used to interpret content model information.', '__name__': 'pyexpat.model'} + """ + +def test_parse_only_xml_data(): + """ + Source: test_pyexpat.py, see also: http://python.org/sf/1296433 + Changes: + - replaced 'iso8859' encoding with 'ISO-8859-1', + - added isfinal=True keyword argument to Parse call (as in this port, + the data is not processed until it is fully available). + With these changes, the test still crashes CPython 2.5. + + >>> import xml.parsers.expat as expat + >>> # xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) + + This one doesn't crash: + >>> xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) + + >>> def handler(text): + ... raise Exception + >>> parser = expat.ParserCreate() + >>> parser.CharacterDataHandler = handler + >>> try: + ... parser.Parse(xml, True) + ... except: + ... pass + """ + + +def test_namespace_separator(): + """ + Source: test_pyexpat.py + + Tests that make sure we get errors when the namespace_separator value + is illegal, and that we don't for good values: + + >>> from xml.parsers.expat import ParserCreate + + >>> p = ParserCreate() + >>> p = ParserCreate(namespace_separator=None) + >>> p = ParserCreate(namespace_separator=' ') + >>> p = ParserCreate(namespace_separator=42) #doctest: +ELLIPSIS + Traceback (most recent call last): + ... + TypeError: ... + >>> p = ParserCreate(namespace_separator='too long') #doctest: +ELLIPSIS + Traceback (most recent call last): + ... + ValueError: ... + + ParserCreate() needs to accept a namespace_separator of zero length + to satisfy the requirements of RDF applications that are required + to simply glue together the namespace URI and the localname. Though + considered a wart of the RDF specifications, it needs to be supported. + + See XML-SIG mailing list thread starting with + http://mail.python.org/pipermail/xml-sig/2001-April/005202.html + + >>> p = ParserCreate(namespace_separator='') # too short +""" + + +def test_interning_machinery(): + """ + Source: test_pyexpat.py + + >>> from xml.parsers.expat import ParserCreate + + >>> p = ParserCreate() + >>> L = [] + >>> def collector(name, *args): + ... L.append(name) + >>> p.StartElementHandler = collector + >>> p.EndElementHandler = collector + >>> p.Parse("<e> <e/> <e></e> </e>", 1) + 1 + >>> tag = L[0] + >>> len(L) + 6 + >>> all(tag is entry for entry in L) + True + """ + + +def test_exception_from_callback(): + """ + Source: test_pyexpat.py + + >>> from xml.parsers.expat import ParserCreate + + >>> def StartElementHandler(name, attrs): + ... raise RuntimeError(name) + + >>> parser = ParserCreate() + >>> parser.StartElementHandler = StartElementHandler + >>> try: + ... parser.Parse("<a><b><c/></b></a>", 1) + ... except RuntimeError, e: + ... pass + >>> e.args[0] == "a" + True + """ + + +def test_with_and_without_namespace(): + """ + >>> from xml.parsers.expat import ParserCreate + + >>> xml = '''<root + ... xmlns="http://www.python.org" + ... xmlns:python="http://www.python.org" + ... python:a="1" b="2"> + ... <python:sub1/> + ... <sub2 xmlns=""/> + ... </root>''' + >>> def handler(name, attributes): + ... attributes = sorted(attributes.items()) + ... print name + ... for attr in attributes: + ... print " %s = %r" % attr + + >>> parser = ParserCreate() + >>> parser.StartElementHandler = handler + >>> _ = parser.Parse(xml, True) + root + b = u'2' + python:a = u'1' + xmlns = u'http://www.python.org' + xmlns:python = u'http://www.python.org' + python:sub1 + sub2 + xmlns = u'' + + >>> parser = ParserCreate(namespace_separator="|") + >>> parser.StartElementHandler = handler + >>> _ = parser.Parse(xml, True) + http://www.python.org|root + b = u'2' + http://www.python.org|a = u'1' + http://www.python.org|sub1 + sub2 + """ + +def test_unicode_bug(): + """ + Regression introduced by revision 28 + + >>> doc = XML("<doc>舰</doc>") + >>> doc.text + u'\u8230' + """ + +def test_DTD(): + """ + >>> xml = '''<!DOCTYPE doc [ + ... <!ELEMENT doc (any|empty|text|mixed|opt|many|plus)> + ... <!ELEMENT any ANY> + ... <!ELEMENT empty EMPTY> + ... <!ELEMENT text (#PCDATA)> + ... <!ELEMENT sequence (_sequence)> + ... <!ELEMENT _sequence (any,any)> + ... <!ELEMENT mixed (#PCDATA|any)*> + ... <!ELEMENT opt (empty)?> + ... <!ELEMENT many (empty)*> + ... <!ELEMENT plus (empty)+> + ... ]> + ... <doc><text>content</text></doc> + ... ''' + >>> parser = expat.ParserCreate() + >>> def handler(header, *args): + ... def _handler(*args): + ... print header + ":", args + ... return _handler + >>> parser.ElementDeclHandler = handler("ELEMENT") + >>> parser.AttlistDeclHandler = handler("ATTRIBUTE") + >>> parser.EntityDeclHandler = handler("ENTITY") + >>> parser.NotationDeclHandler = handler("NOTATION") + >>> parser.UnparsedEntityDeclHandler = handler("UNPARSED") + >>> parser.Parse(xml, True) + ELEMENT: (u'doc', (5, 0, None, ((4, 0, u'any', ()), (4, 0, u'empty', ()), (4, 0, u'text', ()), (4, 0, u'mixed', ()), (4, 0, u'opt', ()), (4, 0, u'many', ()), (4, 0, u'plus', ())))) + ELEMENT: (u'any', (2, 0, None, ())) + ELEMENT: (u'empty', (1, 0, None, ())) + ELEMENT: (u'text', (3, 0, None, ())) + ELEMENT: (u'sequence', (6, 0, None, ((4, 0, u'_sequence', ()),))) + ELEMENT: (u'_sequence', (6, 0, None, ((4, 0, u'any', ()), (4, 0, u'any', ())))) + ELEMENT: (u'mixed', (3, 2, None, ((4, 0, u'any', ()),))) + ELEMENT: (u'opt', (6, 1, None, ((4, 0, u'empty', ()),))) + ELEMENT: (u'many', (6, 2, None, ((4, 0, u'empty', ()),))) + ELEMENT: (u'plus', (6, 3, None, ((4, 0, u'empty', ()),))) + 1 + """ + +def test_entity(): + """ + + TODO: need a fallback for entity-resolver so that empty source is returned. + + >>> xml = ''' <!DOCTYPE doc SYSTEM "external.dtd" [ + ... <!ENTITY ext-entity SYSTEM "external-entity"> + ... ]> + ... <doc>&ext-entity;&in-ext-dtd-entity;</doc>''' + >>> parser = expat.ParserCreate() + >>> parser.Parse(xml, True) + 1 + + EXPAT OH MY ! When applicable (internal entities), the CharacterDataHandler + callback will override DefaultHandlerExpand, but it WON'T override + DefaultHandler. On the other hand, the DefaultHandlerExpand callback WILL + override DefaultHandler ... More tests todo here ... + + >>> xml = '''<!DOCTYPE doc SYSTEM "external.dtd" [ + ... <!ENTITY ext-entity SYSTEM "external-entity"> + ... <!ENTITY int-entity "internal"> + ... ]> + ... <doc>&int-entity;&ext-entity;&in-ext-dtd-entity;</doc>''' + >>> parser = expat.ParserCreate() + >>> def handler(header): + ... def _handler(*args): + ... print header + ":", args + ... return 1 + ... return _handler + >>> parser.CharacterDataHandler = handler("text") + >>> parser.DefaultHandler = handler("default") + >>> parser.Parse(xml, True) #doctest: +ELLIPSIS + default: ... + default: (u'&int-entity;',) + default: (u'&ext-entity;',) + default: (u'&in-ext-dtd-entity;',) + ... + 1 + + EXPAT OH MY ! When applicable (internal entities), the CharacterDataHandler + callback will override DefaultHandlerExpand, but it WON'T override + DefaultHandler. On the other hand, the DefaultHandlerExpand callback WILL + override DefaultHandler ... More tests todo here ... + """ + +def test_resolve_entity_handlers(): + """ + >>> xml = '''<!DOCTYPE doc [ + ... <!ENTITY entity SYSTEM "entity"> + ... ]> + ... <doc>&entity;</doc>''' + >>> def handler(header): + ... def _handler(*args): + ... print header + ":", args + ... return 1 + ... return _handler + + >>> parser = expat.ParserCreate() + >>> parser.ExternalEntityRefHandler = handler("ExternalEntityRefHandler") + >>> parser.Parse(xml, True) + ExternalEntityRefHandler: (u'entity', None, u'entity', None) + 1 + """ + +def handler(name, header="XML>", returns=None): + def _handler(*args): + if len(args) == 1: + args = "(%r)" % args[0] + else: + args = str(args) + print header, name + "%s" % args + return returns + return _handler + +def parse(xml, *handlers): + parser = expat.ParserCreate() + for name in handlers: + if name == "ExternalEntityRefHandler": + returns = 1 + else: + returns = None + setattr(parser, name, handler(name, returns=returns)) + parser.Parse(xml, True) + +def test_internal_entities(): + """ + >>> xml = '''<!DOCTYPE doc [ + ... <!ENTITY entity "entity-content"> + ... ]> + ... <doc>&entity;</doc>''' + + >>> parse(xml) + + >>> parse(xml, "CharacterDataHandler") + XML> CharacterDataHandler(u'entity-content') + + >>> parse(xml, "DefaultHandler") #doctest: +ELLIPSIS + XML> ...DefaultHandler(u'&entity;')... + + >>> parse(xml, "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...DefaultHandlerExpand(u'entity-content')... + + # Uhu ? + >>> parse(xml, "CharacterDataHandler", + ... "DefaultHandler") #doctest: +ELLIPSIS + XML> ...DefaultHandler(u'&entity;')... + + >>> parse(xml, "CharacterDataHandler", + ... "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...CharacterDataHandler(u'entity-content')... + + >>> parse(xml, "DefaultHandler", + ... "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...DefaultHandlerExpand(u'entity-content')... + + >>> parse(xml, "CharacterDataHandler", + ... "DefaultHandler", + ... "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...CharacterDataHandler(u'entity-content')... + """ + +def test_external_entities(): + """ + >>> xml = '''<!DOCTYPE doc [ + ... <!ENTITY entity PUBLIC "http://entity-web" "entity-file"> + ... ]> + ... <doc>&entity;</doc>''' + + >>> parse(xml) + + >>> parse(xml, "ExternalEntityRefHandler") + XML> ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web') + + >>> parse(xml, "DefaultHandler") #doctest: +ELLIPSIS + XML> ...DefaultHandler(u'&entity;')... + + >>> parse(xml, "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...DefaultHandlerExpand(u'&entity;')... + + >>> parse(xml, "ExternalEntityRefHandler", + ... "DefaultHandler") #doctest: +ELLIPSIS + XML> ...ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')... + + >>> parse(xml, "ExternalEntityRefHandler", + ... "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')... + + >>> parse(xml, "DefaultHandler", + ... "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...DefaultHandlerExpand(u'&entity;')... + + >>> parse(xml, "ExternalEntityRefHandler", + ... "DefaultHandler", + ... "DefaultHandlerExpand") #doctest: +ELLIPSIS + XML> ...ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')... + """ + +def test_undefined_entities(): + """ + >>> xml = "<doc>&entity;</doc>" + >>> parse(xml) + Traceback (most recent call last): + ... + ExpatError: undefined entity: line 1, column 5 + """ + +def locate(parser, name): + def _handler(*args): + print name, parser.CurrentLineNumber, parser.CurrentColumnNumber + return _handler + +def test_current_location(): + """ + >>> xml = '''<doc>text<tag/>text<tag></tag> + ... <tag></tag> + ... text<tag/> + ... </doc>''' + >>> parser = expat.ParserCreate() + >>> parser.CharacterDataHandler = locate(parser, "TEXT:") + >>> parser.StartElementHandler = locate(parser, "START:") + >>> parser.EndElementHandler = locate(parser, "END:") + >>> _ = parser.Parse(xml, True) #doctest: +ELLIPSIS + START: 1 0 + TEXT: 1 5... + START: 1 9 + END: 1 15 + TEXT: 1 15... + START: 1 19 + END: 1 24 + TEXT: 1 30... + START: 2 0 + END: 2 5 + TEXT: 2 11... + START: 3 4 + END: 3 10 + TEXT: 3 10... + END: 4 0 + + >>> xml = '''<doc> + ... start tag after some text<tag/> + ... <elt></elt><tag/> + ... <elt/><tag/> + ... </doc>''' + >>> parser = expat.ParserCreate() + >>> parser.CharacterDataHandler = locate(parser, "TEXT:") + >>> parser.StartElementHandler = locate(parser, "START:") + >>> parser.EndElementHandler = locate(parser, "END:") + >>> _ = parser.Parse(xml, True) #doctest: +ELLIPSIS + START: 1 0 + TEXT: 1 5... + START: 2 25 + END: 2 31 + TEXT: 2 31... + START: 3 0 + END: 3 5 + START: 3 11 + END: 3 17 + TEXT: 3 17... + START: 4 0 + END: 4 6 + START: 4 6 + END: 4 12 + TEXT: 4 12... + END: 5 0 + """ + + +def test_error_location(): + """ + Source: selftest.py, ElementTree 1.3a3 + Changes: removed dependencies in ElementTree, added one extra test + + >>> def error(xml): + ... p = expat.ParserCreate() + ... try: + ... p.Parse(xml, True) + ... except expat.ExpatError, e: + ... return e.lineno, e.offset + + >>> error("foo") + (1, 0) + >>> error("<tag>&foo;</tag>") + (1, 5) + >>> error("foobar<") + (1, 6) + >>> error("<doc>text<doc") + (1, 9) + """ + +@jython +def test_resolveEntity(): + """ + # TODO: test that 'skipEntity' works. + + >>> # Jython + >>> from org.python.core.util import StringUtil + >>> from jarray import array + + >>> # Java Standard Edition + >>> from org.xml.sax import * + >>> from org.xml.sax.ext import * + >>> from org.xml.sax.helpers import * + >>> from java.io import ByteArrayInputStream + + >>> xml = '''<!DOCTYPE doc + ... [<!ENTITY entity SYSTEM "entity-file"> + ... ]> + ... <doc>&entity;</doc> + ... ''' + + >>> def empty_source(): + ... _source = InputSource() + ... byte_stream = ByteArrayInputStream(array([], "b")) + ... _source.setByteStream(byte_stream) + ... return _source + + >>> class Handler(EntityResolver2): + ... def getExternalSubset(self, name, baseURI): + ... return None + ... def resolveEntity(self, name, publicId, baseURI, systemId): + ... print "Entity name:", name + ... return empty_source() + + >>> def main(): + ... sax_parser = "org.apache.xerces.parsers.SAXParser" + ... reader = XMLReaderFactory.createXMLReader(sax_parser) + ... entity_resolver2 = "http://xml.org/sax/features/use-entity-resolver2" + ... enabled = reader.getFeature(entity_resolver2) + ... print "Entity-Resolver2 enabled:", enabled + ... handler = Handler() + ... reader.setEntityResolver(handler) + ... bytes = StringUtil.toBytes(xml) + ... byte_stream = ByteArrayInputStream(bytes) + ... source = InputSource(byte_stream) + ... reader.parse(source) + + >>> main() + Entity-Resolver2 enabled: True + Entity name: entity + """ + +if __name__ == "__main__": + doctest.testmod() Deleted: branches/jy3k/Lib/xml/parsers/expat.py =================================================================== --- trunk/jython/Lib/xml/parsers/expat.py 2009-01-08 22:54:04 UTC (rev 5893) +++ branches/jy3k/Lib/xml/parsers/expat.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -1,617 +0,0 @@ -# coding: utf-8 - -#------------------------------------------------------------------------------ -# Copyright (c) 2008 Sébastien Boisgérault -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# ----------------------------------------------------------------------------- - -__all__ = ["ExpatError", "ParserCreate", "XMLParserType", "error", "errors"] - -# Jython check -import sys -if not sys.platform.startswith('java'): - raise ImportError("this version of expat requires the jython interpreter") - -# Standard Python Library -import re -import types - -# Jython -from org.python.core import Py -from org.python.core.util import StringUtil -from jarray import array - -# Java Standard Edition -from java.io import ByteArrayInputStream -from java.lang import String, StringBuilder -from org.xml.sax import InputSource -from org.xml.sax import SAXNotRecognizedException, SAXParseException -from org.xml.sax.helpers import XMLReaderFactory -from org.xml.sax.ext import DefaultHandler2 - -# Xerces -try: - # Name mangled by jarjar? - import org.python.apache.xerces.parsers.SAXParser - _xerces_parser = "org.python.apache.xerces.parsers.SAXParser" -except ImportError: - _xerces_parser = "org.apache.xerces.parsers.SAXParser" - - -def ParserCreate(encoding=None, namespace_separator=None): - return XMLParser(encoding, namespace_separator) - - -class XMLParser(object): - - def __init__(self, encoding, namespace_separator): - self.encoding = encoding - self.CurrentLineNumber = 1 - self.CurrentColumnNumber = 0 - self._NextLineNumber = 1 - self._NextColumnNumber = 0 - self.ErrorLineNumber = -1 - self.ErrorColumnNumber = -1 - self.ErrorCode = None - - if namespace_separator is None: - self.namespace_separator = namespace_separator - elif isinstance(namespace_separator, basestring): - self.namespace_separator = str(namespace_separator) - if len(self.namespace_separator) > 1: - error = ("namespace_separator must be at most one character, " - "omitted, or None") - raise ValueError(error) - else: - error = ("ParserCreate() argument 2 must be string or None, " - "not %s" % type(namespace_separator).__name__) - raise TypeError(error) - - self._reader = XMLReaderFactory.createXMLReader(_xerces_parser) - - if self.namespace_separator is None: - try: - feature = "http://xml.org/sax/features/namespaces" - self._reader.setFeature(feature, False) - except SAXNotRecognizedException: - error = ("namespace support cannot be disabled; " - "set namespace_separator to a string of length 1.") - raise ValueError(error) - - self._base = None - self._buffer_text = True - self._returns_unicode = True - - self._data = StringBuilder() - - self._handler = XMLEventHandler(self) - self._reader.setContentHandler(self._handler) - self._reader.setErrorHandler(self._handler) - self._reader.setDTDHandler(self._handler) - self._reader.setEntityResolver(self._handler) - - sax_properties = ("lexical-handler", "declaration-handler") - for name in sax_properties: - try: - name = "http://xml.org/sax/properties/" + name - self._reader.setProperty(name, self._handler) - except SAXNotRecognizedException: - error = "can't set property %r" % name - raise NotImplementedError(error) - - apache_features = (("nonvalidating/load-external-dtd", False),) - for name, value in apache_features: - try: - name = "http://apache.org/xml/features/" + name - self._reader.setFeature(name, value) - except SAXNotRecognizedException: - error = "can't set feature %r" % name - raise NotImplementedError(error) - - # experimental - #f = "http://xml.org/sax/features/external-general-entities" - f = "http://xml.org/sax/features/external-parameter-entities" - #self._reader.setFeature(f, False) - - # check - f = "http://xml.org/sax/features/use-entity-resolver2" - assert self._reader.getFeature(f) - - def GetBase(self): - return self._base - - def SetBase(self, base): - self._base = base - - def _error(self, value=None): - raise AttributeError("'XMLParser' has no such attribute") - - def _get_buffer_text(self): - return self._buffer_text - - def _set_buffer_text(self, value): - self._buffer_text = bool(value) - - def _get_returns_unicode(self): - return bool(self._returns_unicode) - - def _set_returns_unicode(self, value): - self._returns_unicode = value - - # 'ordered' and 'specified' attributes are not supported - ordered_attributes = property(_error, _error) - specified_attributes = property(_error, _error) - # any setting is allowed, but it won't make a difference - buffer_text = property(_get_buffer_text, _set_buffer_text) - # non-significant read-only values - buffer_used = property(lambda self: None) - buffer_size = property(lambda self: None) - # 'returns_unicode' attribute is properly supported - returns_unicode = property(_get_returns_unicode, _set_returns_unicode) - - def _expat_error(self, sax_error): - sax_message = sax_error.getMessage() - pattern = 'The entity ".*" was referenced, but not declared\.' - if re.match(pattern, sax_message): - expat_message = "undefined entity: line %s, column %s" % \ - (self.ErrorLineNumber, self.ErrorColumnNumber) - else: - expat_message = sax_message - error = ExpatError(expat_message) - error.lineno = self.ErrorLineNumber - error.offset = self.ErrorColumnNumber - error.code = self.ErrorCode - return error - - def Parse(self, data, isfinal=False): - # The 'data' argument should be an encoded text: a str instance that - # represents an array of bytes. If instead it is a unicode string, - # only the us-ascii range is considered safe enough to be silently - # converted. - if isinstance(data, unicode): - data = data.encode(sys.getdefaultencoding()) - - self._data.append(data) - - if isfinal: - bytes = StringUtil.toBytes(self._data.toString()) - byte_stream = ByteArrayInputStream(bytes) - source = InputSource(byte_stream) - if self.encoding is not None: - source.setEncoding(self.encoding) - try: - self._reader.parse(source) - except SAXParseException, sax_error: - # Experiments tend to show that the '_Next*' parser locations - # match more closely expat behavior than the 'Current*' or sax - # error locations. - self.ErrorLineNumber = self._NextLineNumber - self.ErrorColumnNumber = self._NextColumnNumber - self.ErrorCode = None - raise self._expat_error(sax_error) - return 1 - - def ParseFile(self, file): - # TODO: pseudo-buffering if a read without argument is not supported. - # document parse / parsefile usage. - return self.Parse(file.read(), isfinal=True) - - -XMLParserType = XMLParser - - -def _encode(arg, encoding): - if isinstance(arg, unicode): - return arg.encode(encoding) - else: - if isinstance(arg, dict): - iterator = arg.iteritems() - else: - iterator = iter(arg) - return type(arg)(_encode(_arg, encoding) for _arg in iterator) - - -def expat(callback=None, guard="True", force=False, returns="None"): - global _register - try: - _ = _register - except NameError: - _register = {} - - def _expat(method): - name = method.__name__ - context = id(sys._getframe(1)) - key = name, context - append = _register.setdefault(key, []).append - append((method, callback, guard, force, returns)) - - def new_method(*args): - self = args[0] - parser = self.parser - self._update_location(event=name) # bug if multiple method def - for (method, callback, guard, force, returns) in _register[key]: - _callback = callback and eval(guard) and \ - getattr(parser, callback, None) - if _callback or force: - results = method(*args) - if _callback: - if not isinstance(results, tuple): - results = (results,) - if not parser.returns_unicode: - results = _encode(results, "utf-8") - _callback(*results) - return_ = eval(returns) - if callable(return_): - return return_(*args[1:]) - else: - return return_ - break - new_method.__name__ = name - #new_method.__doc__ = method.__doc__ # what to do with multiple docs ? - return new_method - return _expat - - -class XMLEventHandler(DefaultHandler2): - - def __init__(self, parser): - self.parser = parser - self._tags = {} - self.dtd = False - self._entity = {} - self._previous_event = None - - # --- Helpers ------------------------------------------------------------- - - def _intern(self, tag): - return self._tags.setdefault(tag, tag) - - def _qualify(self, local_name, qname, namespace=None): - namespace_separator = self.parser.namespace_separator - if namespace_separator is None: - return qname - if not namespace: - return local_name - else: - return namespace + namespace_separator + local_name - - def _char_slice_to_unicode(self, characters, start, length): - """Convert a char[] slice to a PyUnicode instance""" - text = Py.newUnicode(String(characters[start:start + length])) - return text - - def _expat_content_model(self, name, model_): - # TODO : implement a model parser - return (name, model_) # does not fit expat conventions - - def _update_location(self, event=None): - parser = self.parser - locator = self._locator - - # ugly hack that takes care of a xerces-specific (?) locator issue: - # locate start and end elements at the '<' instead of the first tag - # type character. - if event == "startElement" and self._previous_event == "characters": - parser._NextColumnNumber = max(parser._NextColumnNumber - 1, 0) - if event == "endElement" and self._previous_event == "characters": - parser._NextColumnNumber = max(parser._NextColumnNumber - 2, 0) - # TODO: use the same trick to report accurate error locations ? - - parser.CurrentLineNumber = parser._NextLineNumber - parser.CurrentColumnNumber = parser._NextColumnNumber - parser._NextLineNumber = locator.getLineNumber() - parser._NextColumnNumber = locator.getColumnNumber() - 1 - - self._previous_event = event - - # --- ContentHandler Interface -------------------------------------------- - - @expat("ProcessingInstructionHandler") - def processingInstruction(self, target, data): - return target, data - - @expat("StartElementHandler") - def startElement(self, namespace, local_name, qname, attributes): - tag = self._qualify(local_name, qname, namespace) - attribs = {} - length = attributes.getLength() - for index in range(length): - local_name = attributes.getLocalName(index) - qname = attributes.getQName(index) - namespace = attributes.getURI(index) - name = self._qualify(local_name, qname, namespace) - value = attributes.getValue(index) - attribs[name] = value - return self._intern(tag), attribs - - @expat("EndElementHandler") - def endElement(self, namespace, local_name, qname): - return self._intern(self._qualify(local_name, qname, namespace)) - - @expat("CharacterDataHandler") - def characters(self, characters, start, length): - return self._char_slice_to_unicode(characters, start, length) - - @expat("DefaultHandlerExpand") - def characters(self, characters, start, length): - return self._char_slice_to_unicode(characters, start, length) - - @expat("DefaultHandler") - def characters(self, characters, start, length): - # TODO: make a helper function here - if self._entity["location"] == (self.parser.CurrentLineNumber, - self.parser.CurrentColumnNumber): - return "&%s;" % self._entity["name"] - else: - return self._char_slice_to_unicode(characters, start, length) - - @expat("StartNamespaceDeclHandler") - def startPrefixMapping(self, prefix, uri): - return prefix, uri - - @expat("EndNamespaceDeclHandler") - def endPrefixMapping(self, prefix): - return prefix - - def _empty_source(self, *args): - name, publicId, baseURI, systemId = args - source = InputSource() - byte_stream = ByteArrayInputStream(array([], "b")) - source.setByteStream(byte_stream) - source.setPublicId(publicId) - source.setSystemId(systemId) - return source - - @expat("ExternalEntityRefHandler", guard="not self.dtd", - returns="self._empty_source") - def resolveEntity(self, name, publicId, baseURI, systemId): - context = name # wrong. see expat headers documentation. - base = self.parser.GetBase() - return context, base, systemId, publicId - - @expat("DefaultHandlerExpand", guard="not self.dtd", - returns="self._empty_source") - def resolveEntity(self, name, publicId, baseURI, systemId): - return "&%s;" % name - - @expat("DefaultHandler", guard="not self.dtd", - returns="self._empty_source") - def resolveEntity(self, name, publicId, baseURI, systemId): - return "&%s;" % name - - @expat(force=True, returns="self._empty_source") - def resolveEntity(self, name, publicId, baseURI, systemId): - pass - - def setDocumentLocator(self, locator): - self._locator = locator - - def skippedEntity(self, name): - error = ExpatError() - error.lineno = self.ErrorLineNumber = self.parser._NextLineNumber - error.offset = self.ErrorColumnNumber = self.parser._NextColumnNumber - error.code = self.ErrorCode = None - message = "undefined entity &%s;: line %s, column %s" - message = message % (name, error.lineno, error.offset) - error.__init__(message) - raise error - - # --- LexicalHandler Interface -------------------------------------------- - - @expat("CommentHandler") - def comment(self, characters, start, length): - return self._char_slice_to_unicode(characters, start, length) - - @expat("StartCdataSectionHandler") - def startCDATA(self): - return () - - @expat("EndCdataSectionHandler") - def endCDATA(self): - return () - - @expat("StartDoctypeDeclHandler", force=True) - def startDTD(self, name, publicId, systemId): - self.dtd = True - has_internal_subset = 0 # don't know this ... - return name, systemId, publicId, has_internal_subset - - @expat("EndDoctypeDeclHandler", force=True) - def endDTD(self): - self.dtd = False - - def startEntity(self, name): - self._entity = {} - self._entity["location"] = (self.parser._NextLineNumber, - self.parser._NextColumnNumber) - self._entity["name"] = name - - def endEntity(self, name): - pass - - # --- DTDHandler Interface ------------------------------------------------ - - @expat("NotationDeclHandler") - def notationDecl(self, name, publicId, systemId): - base = self.parser.GetBase() - return name, base, systemId, publicId - - @expat("UnparsedEntityDeclHandler") # deprecated - def unparsedEntityDecl(self, name, publicId, systemId, notationName): - base = self.parser.GetBase() - return name, base, systemId, publicId, notationName - - # --- DeclHandler Interface ----------------------------------------------- - - @expat("AttlistDeclHandler") - def attributeDecl(self, eName, aName, type, mode, value): - # TODO: adapt mode, required, etc. - required = False - return eName, aName, type, value, required - - @expat("ElementDeclHandler") - def elementDecl(self, name, model): - return self._expat_content_model(name, model) - - @expat("EntityDeclHandler") - def externalEntityDecl(self, name, publicId, systemId): - base = self.parser.GetBase() - value = None - is_parameter_entity = None - notation_name = None - return (name, is_parameter_entity, value, base, systemId, publicId, - notation_name) - - @expat("EntityDeclHandler") - def internalEntityDecl(self, name, value): - base = self.parser.GetBase() - is_parameter_entity = None - notation_name = None - systemId, publicId = None, None - return (name, is_parameter_entity, value, base, systemId, publicId, - notation_name) - - -def _init_model(): - global model - model = types.ModuleType("pyexpat.model") - model.__doc__ = "Constants used to interpret content model information." - quantifiers = "NONE, OPT, REP, PLUS" - for i, quantifier in enumerate(quantifiers.split(", ")): - setattr(model, "XML_CQUANT_" + quantifier, i) - types_ = "EMPTY, ANY, MIXED, NAME, CHOICE, SEQ" - for i, type_ in enumerate(types_.split(", ")): - setattr(model, "XML_CTYPE_" + type_, i+1) - -_init_model() -del _init_model - - -class ExpatError(Exception): - pass - - -error = ExpatError - - -def _init_error_strings(): - global ErrorString - error_strings = ( - None, - "out of memory", - "syntax error", - "no element found", - "not well-formed (invalid token)", - "unclosed token", - "partial character", - "mismatched tag", - "duplicate attribute", - "junk after document element", - "illegal parameter entity reference", - "undefined entity", - "recursive entity reference", - "asynchronous entity", - "reference to invalid character number", - "reference to binary entity", - "reference to external entity in attribute", - "XML or text declaration not at start of entity", - "unknown encoding", - "encoding specified in XML declaration is incorrect", - "unclosed CDATA section", - "error in processing external entity reference", - "document is not standalone", - "unexpected parser state - please send a bug report", - "entity declared in parameter entity", - "requested feature requires XML_DTD support in Expat", - "cannot change setting once parsing has begun", - "unbound prefix", - "must not undeclare prefix", - "incomplete markup in parameter entity", - "XML declaration not well-formed", - "text declaration not well-formed", - "illegal character(s) in public id", - "parser suspended", - "parser not suspended", - "parsing aborted", - "parsing finished", - "cannot suspend in external parameter entity") - def ErrorString(code): - try: - return error_strings[code] - except IndexError: - return None - -_init_error_strings() -del _init_error_strings - - -def _init_errors(): - global errors - - errors = types.ModuleType("pyexpat.errors") - errors.__doc__ = "Constants used to describe error conditions." - - error_names = """ - XML_ERROR_NONE - XML_ERROR_NONE, - XML_ERROR_NO_MEMORY, - XML_ERROR_SYNTAX, - XML_ERROR_NO_ELEMENTS, - XML_ERROR_INVALID_TOKEN, - XML_ERROR_UNCLOSED_TOKEN, - XML_ERROR_PARTIAL_CHAR, - XML_ERROR_TAG_MISMATCH, - XML_ERROR_DUPLICATE_ATTRIBUTE, - XML_ERROR_JUNK_AFTER_DOC_ELEMENT, - XML_ERROR_PARAM_ENTITY_REF, - XML_ERROR_UNDEFINED_ENTITY, - XML_ERROR_RECURSIVE_ENTITY_REF, - XML_ERROR_ASYNC_ENTITY, - XML_ERROR_BAD_CHAR_REF, - XML_ERROR_BINARY_ENTITY_REF, - XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, - XML_ERROR_MISPLACED_XML_PI, - XML_ERROR_UNKNOWN_ENCODING, - XML_ERROR_INCORRECT_ENCODING, - XML_ERROR_UNCLOSED_CDATA_SECTION, - XML_ERROR_EXTERNAL_ENTITY_HANDLING, - XML_ERROR_NOT_STANDALONE, - XML_ERROR_UNEXPECTED_STATE, - XML_ERROR_ENTITY_DECLARED_IN_PE, - XML_ERROR_FEATURE_REQUIRES_XML_DTD, - XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, - XML_ERROR_UNBOUND_PREFIX, - XML_ERROR_UNDECLARING_PREFIX, - XML_ERROR_INCOMPLETE_PE, - XML_ERROR_XML_DECL, - XML_ERROR_TEXT_DECL, - XML_ERROR_PUBLICID, - XML_ERROR_SUSPENDED, - XML_ERROR_NOT_SUSPENDED, - XML_ERROR_ABORTED, - XML_ERROR_FINISHED, - XML_ERROR_SUSPEND_PE - """ - error_names = [name.strip() for name in error_names.split(',')] - for i, name in enumerate(error_names[1:]): - setattr(errors, name, ErrorString(i+1)) - -_init_errors() -del _init_errors Copied: branches/jy3k/Lib/xml/parsers/expat.py (from rev 5893, trunk/jython/Lib/xml/parsers/expat.py) =================================================================== --- branches/jy3k/Lib/xml/parsers/expat.py (rev 0) +++ branches/jy3k/Lib/xml/parsers/expat.py 2009-01-09 04:50:22 UTC (rev 5899) @@ -0,0 +1,617 @@ +# coding: utf-8 + +#------------------------------------------------------------------------------ +# Copyright (c) 2008 Sébastien Boisgérault +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# ----------------------------------------------------------------------------- + +__all__ = ["ExpatError", "ParserCreate", "XMLParserType", "error", "errors"] + +# Jython check +import sys +if not sys.platform.startswith('java'): + raise ImportError("this version of expat requires the jython interpreter") + +# Standard Python Library +import re +import types + +# Jython +from org.python.core import Py +from org.python.core.util import StringUtil +from jarray import array + +# Java Standard Edition +from java.io import ByteArrayInputStream +from java.lang import String, StringBuilder +from org.xml.sax import InputSource +from org.xml.sax import SAXNotRecognizedException, SAXParseException +from org.xml.sax.helpers import XMLReaderFactory +from org.xml.sax.ext import DefaultHandler2 + +# Xerces +try: + # Name mangled by jarjar? + import org.python.apache.xerces.parsers.SAXParser + _xerces_parser = "org.python.apache.xerces.parsers.SAXParser" +except ImportError: + _xerces_parser = "org.apache.xerces.parsers.SAXParser" + + +def ParserCreate(encoding=None, namespace_separator=None): + return XMLParser(encoding, namespace_separator) + + +class XMLParser(object): + + def __init__(self, encoding, namespace_separator): + self.encoding = encoding + self.CurrentLineNumber = 1 + self.CurrentColumnNumber = 0 + self._NextLineNumber = 1 + self._NextColumnNumber = 0 + self.ErrorLineNumber = -1 + self.ErrorColumnNumber = -1 + self.ErrorCode = None + + if namespace_separator is None: + self.namespace_separator = namespace_separator + elif isinstance(namespace_separator, basestring): + self.namespace_separator = str(namespace_separator) + if len(self.namespace_separator) > 1: + error = ("namespace_separator must be at most one character, " + "omitted, or None") + raise ValueError(error) + else: + error = ("ParserCreate() argument 2 must be string or None, " + "not %s" % type(namespace_separator).__name__) + raise TypeError(error) + + self._reader = XMLReaderFactory.createXMLReader(_xerces_parser) + + if self.namespace_separator is None: + try: + feature = "http://xml.org/sax/features/namespaces" + self._reader.setFeature(feature, False) + except SAXNotRecognizedException: + error = ("namespace support cannot be disabled; " + "set namespace_separator to a string of length 1.") + raise ValueError(error) + + self._base = None + self._buffer_text = True + self._returns_unicode = True + + self._data = StringBuilder() + + self._handler = XMLEventHandler(self) + self._reader.setContentHandler(self._handler) + self._reader.setErrorHandler(self._handler) + self._reader.setDTDHandler(self._handler) + self._reader.setEntityResolver(self._handler) + + sax_properties = ("lexical-handler", "declaration-handler") + for name in sax_properties: + try: + name = "http://xml.org/sax/properties/" + name + self._reader.setProperty(name, self._handler) + except SAXNotRecognizedException: + error = "can't set property %r" % name + raise NotImplementedError(error) + + apache_features = (("nonvalidating/load-external-dtd", False),) + for name, value in apache_features: + try: + name = "http://apache.org/xml/features/" + name + self._reader.setFeature(name, value) + except SAXNotRecognizedException: + error = "can't set feature %r" % name + raise NotImplementedError(error) + + # experimental + #f = "http://xml.org/sax/features/external-general-entities" + f = "http://xml.org/sax/features/external-parameter-entities" + #self._reader.setFeature(f, False) + + # check + f = "http://xml.org/sax/features/use-entity-resolver2" + assert self._reader.getFeature(f) + + def GetBase(self): + return self._base + + def SetBase(self, base): + self._base = base + + def _error(self, value=None): + raise AttributeError("'XMLParser' has no such attribute") + + def _get_buffer_text(self): + return self._buffer_text + + def _set_buffer_text(self, value): + self._buffer_text = bool(value) + + def _get_returns_unicode(self): + return bool(self._returns_unicode) + + def _set_returns_unicode(self, value): + self._returns_unicode = value + + # 'ordered' and 'specified' attributes are not supported + ordered_attributes = property(_error, _error) + specified_attributes = property(_error, _error) + # any setting is allowed, but it won't make a difference + buffer_text = property(_get_buffer_text, _set_buffer_text) + # non-significant read-only values + buffer_used = property(lambda self: None) + buffer_size = property(lambda self: None) + # 'returns_unicode' attribute is properly supported + returns_unicode = property(_get_returns_unicode, _set_returns_unicode) + + def _expat_error(self, sax_error): + sax_message = sax_error.getMessage() + pattern = 'The entity ".*" was referenced, but not declared\.' + if re.match(pattern, sax_message): + expat_message = "undefined entity: line %s, column %s" % \ + (self.ErrorLineNumber, self.ErrorColumnNumber) + else: + expat_message = sax_message + error = ExpatError(expat_message) + error.lineno = self.ErrorLineNumber + error.offset = self.ErrorColumnNumber + error.code = self.ErrorCode + return error + + def Parse(self, data, isfinal=False): + # The 'data' argument should be an encoded text: a str instance that + # represents an a... [truncated message content] |