From: <cg...@us...> - 2009-01-05 07:12:46
|
Revision: 5847 http://jython.svn.sourceforge.net/jython/?rev=5847&view=rev Author: cgroves Date: 2009-01-05 07:12:43 +0000 (Mon, 05 Jan 2009) Log Message: ----------- test302 - Move to test_pep263_jy This adds a check for the encoding declared by the source file actually matching up with what we were getting out of it, and switches the default encoding to ascii to follow CPython. CPython assumes utf-8 in some single compilation contexts, and this uses the same parser algorithm everywhere, so I made a small change to test_doctest since Jython is throwing a SyntaxError on utf-8 strings to a single compilation. Modified Paths: -------------- trunk/jython/Lib/test/test_doctest.py trunk/jython/src/org/python/core/ParserFacade.java Added Paths: ----------- trunk/jython/Lib/test/invalid_utf_8_declared_encoding.py trunk/jython/Lib/test/latin1_no_encoding.py trunk/jython/Lib/test/test_pep263_jy.py Removed Paths: ------------- trunk/jython/bugtests/test302.py Added: trunk/jython/Lib/test/invalid_utf_8_declared_encoding.py =================================================================== (Binary files differ) Property changes on: trunk/jython/Lib/test/invalid_utf_8_declared_encoding.py ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: trunk/jython/Lib/test/latin1_no_encoding.py =================================================================== (Binary files differ) Property changes on: trunk/jython/Lib/test/latin1_no_encoding.py ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: trunk/jython/Lib/test/test_doctest.py =================================================================== --- trunk/jython/Lib/test/test_doctest.py 2009-01-05 07:08:51 UTC (rev 5846) +++ trunk/jython/Lib/test/test_doctest.py 2009-01-05 07:12:43 UTC (rev 5847) @@ -2265,17 +2265,18 @@ File "...", line 7, in test_doctest4.txt Failed example: u'...' - Expected: - u'f\xf6\xf6' - Got: - u'f\xc3\xb6\xc3\xb6' + ... ********************************************************************** ... ********************************************************************** + ... + ********************************************************************** + ... + ********************************************************************** 1 items had failures: - 2 of 4 in test_doctest4.txt - ***Test Failed*** 2 failures. - (2, 4) + 4 of 4 in test_doctest4.txt + ***Test Failed*** 4 failures. + (4, 4) >>> doctest.master = None # Reset master. >>> doctest.testfile('test_doctest4.txt', encoding='utf-8') Added: trunk/jython/Lib/test/test_pep263_jy.py =================================================================== --- trunk/jython/Lib/test/test_pep263_jy.py (rev 0) +++ trunk/jython/Lib/test/test_pep263_jy.py 2009-01-05 07:12:43 UTC (rev 5847) @@ -0,0 +1,16 @@ +import unittest +from test import test_support + +class BadEncodingTest(unittest.TestCase): + + def test_invalid_default(self): + self.assertRaises(SyntaxError, __import__, "test.latin1_no_encoding") + + def test_invalid_declared_encoding(self): + self.assertRaises(SyntaxError, __import__, "test.invalid_utf_8_declared_encoding") + +def test_main(): + test_support.run_unittest(BadEncodingTest) + +if __name__=="__main__": + test_main() Deleted: trunk/jython/bugtests/test302.py =================================================================== --- trunk/jython/bugtests/test302.py 2009-01-05 07:08:51 UTC (rev 5846) +++ trunk/jython/bugtests/test302.py 2009-01-05 07:12:43 UTC (rev 5847) @@ -1,20 +0,0 @@ -""" -In bug #439688 the value 0x99 does not survive the JavaCC parser. -""" - -import sys -print sys.defaultencoding -f = open("test302s.py", "wb") -f.write('v = "\x99"\n') -f.close() - -import test302s - -f = open("test302.out", "w") -f.write("\x99") -f.close(); - -from java.io import FileInputStream, InputStreamReader -readval = InputStreamReader(FileInputStream("test302.out"), 'ISO-8859-1').read() - -print ord(test302s.v) == readval Modified: trunk/jython/src/org/python/core/ParserFacade.java =================================================================== --- trunk/jython/src/org/python/core/ParserFacade.java 2009-01-05 07:08:51 UTC (rev 5846) +++ trunk/jython/src/org/python/core/ParserFacade.java 2009-01-05 07:12:43 UTC (rev 5847) @@ -8,7 +8,11 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -import java.io.UnsupportedEncodingException; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -56,9 +60,9 @@ } // if reader != null, reset it - public static PyException fixParseError(BufferedReader reader, Throwable t, - String filename) - { + public static PyException fixParseError(ExpectedEncodingBufferedReader reader, + Throwable t, + String filename) { if (reader != null) { try { reader.reset(); @@ -82,6 +86,16 @@ return new PyIndentationError(msg, line, col, text, filename); } return new PySyntaxError(msg, line, col, text, filename); + } else if (t instanceof CharacterCodingException) { + String msg; + if (reader.encoding == null) { + msg = "Non-ASCII character in file '" + filename + "', but no encoding declared" + + "; see http://www.python.org/peps/pep-0263.html for details"; + } else { + msg = "Illegal character in file '" + filename + "' for encoding '" + + reader.encoding + "'"; + } + throw Py.SyntaxError(msg); } else return Py.JavaError(t); } @@ -93,7 +107,7 @@ * from it, to translate ParserExceptions into PySyntaxErrors or * PyIndentationErrors. */ - private static mod parse(BufferedReader reader, + private static mod parse(ExpectedEncodingBufferedReader reader, String kind, String filename, CompilerFlags cflags) throws Throwable { @@ -120,7 +134,7 @@ String kind, String filename, CompilerFlags cflags) { - BufferedReader bufReader = null; + ExpectedEncodingBufferedReader bufReader = null; try { // prepBufReader takes care of encoding detection and universal // newlines: @@ -137,7 +151,7 @@ String kind, String filename, CompilerFlags cflags) { - BufferedReader bufReader = null; + ExpectedEncodingBufferedReader bufReader = null; try { bufReader = prepBufReader(string, cflags, filename); return parse(bufReader, kind, filename, cflags); @@ -154,7 +168,7 @@ CompilerFlags cflags, boolean stdprompt) { // XXX: What's the idea of the stdprompt argument? - BufferedReader reader = null; + ExpectedEncodingBufferedReader reader = null; try { reader = prepBufReader(string, cflags, filename); return parse(reader, kind, filename, cflags); @@ -194,7 +208,21 @@ return true; } - private static BufferedReader prepBufReader(InputStream input, CompilerFlags cflags, + private static class ExpectedEncodingBufferedReader extends BufferedReader { + + /** + * The encoding from the source file, or null if none was specified and ascii is being used. + */ + public final String encoding; + + public ExpectedEncodingBufferedReader(Reader in, String encoding) { + super(in); + this.encoding = encoding; + } + } + + private static ExpectedEncodingBufferedReader prepBufReader(InputStream input, + CompilerFlags cflags, String filename) throws IOException { input = new BufferedInputStream(input); boolean bom = adjustForBOM(input); @@ -222,17 +250,19 @@ UniversalIOWrapper textIO = new UniversalIOWrapper(bufferedIO); input = new TextIOInputStream(textIO); - Reader reader; + CharsetDecoder dec; try { - // Using iso-8859-1 for the raw bytes when no encoding was specified - reader = new InputStreamReader(input, encoding == null ? "iso-8859-1" : encoding); - } catch (UnsupportedEncodingException exc) { + // Use ascii for the raw bytes when no encoding was specified + dec = Charset.forName(encoding == null ? "ascii" : encoding).newDecoder(); + } catch (UnsupportedCharsetException exc) { throw new PySyntaxError("Unknown encoding: " + encoding, 1, 0, "", filename); } - return new BufferedReader(reader); + dec.onMalformedInput(CodingErrorAction.REPORT); + dec.onUnmappableCharacter(CodingErrorAction.REPORT); + return new ExpectedEncodingBufferedReader(new InputStreamReader(input, dec), encoding); } - private static BufferedReader prepBufReader(String string, CompilerFlags cflags, + private static ExpectedEncodingBufferedReader prepBufReader(String string, CompilerFlags cflags, String filename) throws IOException { if (cflags.source_is_utf8) { // Passed unicode, re-encode the String to raw bytes @@ -246,7 +276,7 @@ } /** - * Check for a BOM mark at the begginning of stream. If there is a BOM + * Check for a BOM mark at the beginning of stream. If there is a BOM * mark, advance the stream passed it. If not, reset() to start at the * beginning of the stream again. * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |