From: <cg...@us...> - 2009-01-11 10:35:15
|
Revision: 5918 http://jython.svn.sourceforge.net/jython/?rev=5918&view=rev Author: cgroves Date: 2009-01-11 10:35:10 +0000 (Sun, 11 Jan 2009) Log Message: ----------- In String compilation contexts, we can assume the bytes have already been decoded and just pass them through when parsing instead of using ascii. This gets test_doctest back to passing as it was before and keeps the pep 263 checks. Modified Paths: -------------- trunk/jython/Lib/test/test_doctest.py trunk/jython/src/org/python/core/CompilerFlags.java trunk/jython/src/org/python/core/ParserFacade.java Modified: trunk/jython/Lib/test/test_doctest.py =================================================================== --- trunk/jython/Lib/test/test_doctest.py 2009-01-11 06:24:45 UTC (rev 5917) +++ trunk/jython/Lib/test/test_doctest.py 2009-01-11 10:35:10 UTC (rev 5918) @@ -2265,18 +2265,17 @@ File "...", line 7, in test_doctest4.txt Failed example: u'...' - ... + Expected: + u'f\xf6\xf6' + Got: + u'f\xc3\xb6\xc3\xb6' ********************************************************************** ... ********************************************************************** - ... - ********************************************************************** - ... - ********************************************************************** 1 items had failures: - 4 of 4 in test_doctest4.txt - ***Test Failed*** 4 failures. - (4, 4) + 2 of 4 in test_doctest4.txt + ***Test Failed*** 2 failures. + (2, 4) >>> doctest.master = None # Reset master. >>> doctest.testfile('test_doctest4.txt', encoding='utf-8') Modified: trunk/jython/src/org/python/core/CompilerFlags.java =================================================================== --- trunk/jython/src/org/python/core/CompilerFlags.java 2009-01-11 06:24:45 UTC (rev 5917) +++ trunk/jython/src/org/python/core/CompilerFlags.java 2009-01-11 10:35:10 UTC (rev 5918) @@ -3,54 +3,44 @@ public class CompilerFlags { + private int co_flags; + public boolean nested_scopes = true; public boolean division; public boolean generator_allowed = true; - public boolean with_statement = false; - public boolean absolute_import = false; + public boolean with_statement; + public boolean absolute_import; - public boolean only_ast = false; - public boolean dont_imply_dedent = false; - public boolean source_is_utf8 = false; + public boolean only_ast; + public boolean dont_imply_dedent; + public boolean source_is_utf8; public String encoding; - - public CompilerFlags(){} + public CompilerFlags() {} + public CompilerFlags(int co_flags) { - if ((co_flags & org.python.core.PyTableCode.CO_NESTED) != 0) { - this.nested_scopes = true; - } - if ((co_flags & org.python.core.PyTableCode.CO_FUTUREDIVISION) != 0) { - this.division = true; - } - if ((co_flags & org.python.core.PyTableCode.CO_GENERATOR_ALLOWED) != 0) { - this.generator_allowed = true; - } - if ((co_flags & org.python.core.PyTableCode.CO_FUTURE_ABSOLUTE_IMPORT) != 0) { - this.absolute_import = true; - } - if ((co_flags & org.python.core.PyTableCode.CO_WITH_STATEMENT) != 0) { - this.with_statement = true; - } - if ((co_flags & org.python.core.PyTableCode.PyCF_ONLY_AST) != 0) { - this.only_ast = true; - } - if ((co_flags & org.python.core.PyTableCode.PyCF_DONT_IMPLY_DEDENT) != 0) { - this.dont_imply_dedent = true; - } - if ((co_flags & org.python.core.PyTableCode.PyCF_SOURCE_IS_UTF8) != 0) { - this.source_is_utf8 = true; - } - + this.co_flags = co_flags; + nested_scopes = isEnabled(PyTableCode.CO_NESTED); + division = isEnabled(PyTableCode.CO_FUTUREDIVISION); + generator_allowed = isEnabled(PyTableCode.CO_GENERATOR_ALLOWED); + absolute_import = isEnabled(PyTableCode.CO_FUTURE_ABSOLUTE_IMPORT); + with_statement = isEnabled(PyTableCode.CO_WITH_STATEMENT); + only_ast = isEnabled(PyTableCode.PyCF_ONLY_AST); + dont_imply_dedent = isEnabled(PyTableCode.PyCF_DONT_IMPLY_DEDENT); + source_is_utf8 = isEnabled(PyTableCode.PyCF_SOURCE_IS_UTF8); } + private boolean isEnabled(int codeConstant) { + return (co_flags & codeConstant) != 0; + } + public String toString() { return String.format("CompilerFlags[division=%s nested_scopes=%s generators=%s " + "with_statement=%s absolute_import=%s only_ast=%s " + "dont_imply_dedent=%s source_is_utf8=%s]", division, nested_scopes, - generator_allowed, with_statement, absolute_import, only_ast, + generator_allowed, with_statement, absolute_import, only_ast, dont_imply_dedent, source_is_utf8); } - + } Modified: trunk/jython/src/org/python/core/ParserFacade.java =================================================================== --- trunk/jython/src/org/python/core/ParserFacade.java 2009-01-11 06:24:45 UTC (rev 5917) +++ trunk/jython/src/org/python/core/ParserFacade.java 2009-01-11 10:35:10 UTC (rev 5918) @@ -4,10 +4,13 @@ import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.Reader; +import java.io.Writer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; @@ -138,7 +141,7 @@ try { // prepBufReader takes care of encoding detection and universal // newlines: - bufReader = prepBufReader(stream, cflags, filename); + bufReader = prepBufReader(stream, cflags, filename, false); return parse(bufReader, kind, filename, cflags ); } catch (Throwable t) { throw fixParseError(bufReader, t, filename); @@ -223,7 +226,9 @@ private static ExpectedEncodingBufferedReader prepBufReader(InputStream input, CompilerFlags cflags, - String filename) throws IOException { + String filename, + boolean fromString) + throws IOException { input = new BufferedInputStream(input); boolean bom = adjustForBOM(input); String encoding = readEncoding(input); @@ -250,29 +255,45 @@ UniversalIOWrapper textIO = new UniversalIOWrapper(bufferedIO); input = new TextIOInputStream(textIO); - CharsetDecoder dec; + Charset cs; try { // Use ascii for the raw bytes when no encoding was specified - dec = Charset.forName(encoding == null ? "ascii" : encoding).newDecoder(); + if (encoding == null) { + if (fromString) { + cs = Charset.forName("ISO-8859-1"); + } else { + cs = Charset.forName("ascii"); + } + } else { + cs = Charset.forName(encoding); + } } catch (UnsupportedCharsetException exc) { throw new PySyntaxError("Unknown encoding: " + encoding, 1, 0, "", filename); } + CharsetDecoder dec = cs.newDecoder(); dec.onMalformedInput(CodingErrorAction.REPORT); dec.onUnmappableCharacter(CodingErrorAction.REPORT); return new ExpectedEncodingBufferedReader(new InputStreamReader(input, dec), encoding); } - private static ExpectedEncodingBufferedReader prepBufReader(String string, CompilerFlags cflags, - String filename) throws IOException { + private static ExpectedEncodingBufferedReader prepBufReader(String string, + CompilerFlags cflags, + String filename) throws IOException { + byte[] stringBytes; if (cflags.source_is_utf8) { // Passed unicode, re-encode the String to raw bytes // NOTE: This could be more efficient if we duplicate // prepBufReader/adjustForBOM/readEncoding to work on Readers, instead of // encoding - string = new PyUnicode(string).encode("utf-8"); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Writer w = new OutputStreamWriter(out, "utf-8"); + w.write(string); + w.close(); + stringBytes = out.toByteArray(); + } else { + stringBytes = StringUtil.toBytes(string); } - InputStream input = new ByteArrayInputStream(StringUtil.toBytes(string)); - return prepBufReader(input, cflags, filename); + return prepBufReader(new ByteArrayInputStream(stringBytes), cflags, filename, true); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |