[Htmlparser-cvs] htmlparser/src/org/htmlparser Parser.java,1.93,1.94
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-06-16 02:17:34
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9637 Modified Files: Parser.java Log Message: Fix bug #973137 Double-bytes characters are messed after parsing. Add an encoding parameter to the static createParser() method. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.93 retrieving revision 1.94 diff -C2 -d -r1.93 -r1.94 *** Parser.java 14 Jun 2004 01:26:51 -0000 1.93 --- Parser.java 16 Jun 2004 02:17:25 -0000 1.94 *************** *** 27,33 **** --- 27,35 ---- package org.htmlparser; + import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.Serializable; + import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; *************** *** 792,807 **** /** * Creates the parser on an input string. ! * @param inputHTML ! * @return Parser */ ! public static Parser createParser(String inputHTML) { ! Lexer lexer; Parser ret; ! if (null == inputHTML) throw new IllegalArgumentException ("html cannot be null"); ! lexer = new Lexer (new Page (inputHTML)); ! ret = new Parser (lexer); return (ret); --- 794,828 ---- /** * Creates the parser on an input string. ! * Uses the character set encoding to create a stream of bytes that is ! * fed into the parser as if it had come off the wire. ! * @param html The string containing HTML. ! * @param charset Character set encoding to use when converting the ! * <code>html</code> to a stream of bytes. If charset is <code>null</code> ! * the default character set is used. ! * @return A parser with the <code>html</code> string as input. */ ! public static Parser createParser (String html, String charset) { ! ByteArrayInputStream stream; Parser ret; ! if (null == html) throw new IllegalArgumentException ("html cannot be null"); ! if (null == charset) ! charset = Page.DEFAULT_CHARSET; ! try ! { ! stream = new ByteArrayInputStream (html.getBytes (charset)); ! ret = new Parser (new Lexer (new Page (stream, charset))); ! } ! catch (UnsupportedEncodingException uee) ! { ! String msg; ! ! msg = uee.getMessage (); ! if (null == msg) ! msg = "unsupported encoding (" + charset + ") exception"; ! ret = new Parser (new Lexer (new Page (msg))); ! } return (ret); |