From: <fwi...@us...> - 2008-08-12 17:17:15
|
Revision: 5156 http://jython.svn.sourceforge.net/jython/?rev=5156&view=rev Author: fwierzbicki Date: 2008-08-12 17:17:12 +0000 (Tue, 12 Aug 2008) Log Message: ----------- Implement BOM handling as much as CPython 2.5 does. Altered bad_coding2.py because Java, unlike CPython accepts "utf8" as a synonymn for "utf-8" Modified Paths: -------------- branches/asm/src/org/python/core/ParserFacade.java Added Paths: ----------- branches/asm/Lib/test/bad_coding2.py Added: branches/asm/Lib/test/bad_coding2.py =================================================================== --- branches/asm/Lib/test/bad_coding2.py (rev 0) +++ branches/asm/Lib/test/bad_coding2.py 2008-08-12 17:17:12 UTC (rev 5156) @@ -0,0 +1,5 @@ +#coding: uft8 +print '我' + +#Changed for Jython because the original "coding: utf8" is supposed to be a +#misspelling, but Java accepts this spelling for an encoding. Modified: branches/asm/src/org/python/core/ParserFacade.java =================================================================== --- branches/asm/src/org/python/core/ParserFacade.java 2008-08-11 23:27:27 UTC (rev 5155) +++ branches/asm/src/org/python/core/ParserFacade.java 2008-08-12 17:17:12 UTC (rev 5156) @@ -39,6 +39,8 @@ public class ParserFacade { + private static int MARK_LIMIT = 100000; + private ParserFacade() {} static String getLine(BufferedReader reader, int line) { @@ -192,12 +194,18 @@ private static BufferedReader prepBufreader(InputStream istream, CompilerFlags cflags, String filename) throws IOException { + boolean bom = false; + String encoding = null; InputStream bstream = new BufferedInputStream(istream); - String encoding = readEncoding(bstream); - if(encoding == null && cflags != null && cflags.encoding != null) { - encoding = cflags.encoding; + bom = adjustForBOM(bstream); + encoding = readEncoding(bstream); + if(encoding == null) { + if (bom) { + encoding = "UTF-8"; + } else if (cflags != null && cflags.encoding != null) { + encoding = cflags.encoding; + } } - // Enable universal newlines mode on the input StreamIO rawIO = new StreamIO(bstream, true); org.python.core.io.BufferedReader bufferedIO = @@ -224,13 +232,40 @@ BufferedReader bufreader = new BufferedReader(reader); - bufreader.mark(100000); + bufreader.mark(MARK_LIMIT); return bufreader; } + /** + * Check for a BOM mark at the begginning of stream. If there is a BOM + * mark, advance the stream passed it. If not, reset() to start at the + * beginning of the stream again. + * + * Only checks for EF BB BF right now, since that is all that CPython 2.5 + * Checks. + * + * @return true if a BOM was found and skipped. + * @throws ParseException if only part of a BOM is matched. + * + */ + private static boolean adjustForBOM(InputStream stream) throws IOException { + stream.mark(3); + int ch = stream.read(); + if (ch == 0xEF) { + if (stream.read() != 0xBB) { + throw new ParseException("Incomplete BOM at beginning of file"); + } + if (stream.read() != 0xBF) { + throw new ParseException("Incomplete BOM at beginning of file"); + } + return true; + } + stream.reset(); + return false; + } private static String readEncoding(InputStream stream) throws IOException { - stream.mark(100000); + stream.mark(MARK_LIMIT); String encoding = null; BufferedReader br = new BufferedReader(new InputStreamReader(stream), 512); for (int i = 0; i < 2; i++) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |