[Htmlparser-cvs] htmlparser/src/org/htmlparser Parser.java,1.94,1.95
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-07-03 13:56:31
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14744 Modified Files: Parser.java Log Message: Further fix to bug #973137 Double-bytes characters are messed after parsing. Created a proper String based source with the encoding only optionally specified. A string is no longer converted to a byte array and then back to characters. Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.94 retrieving revision 1.95 diff -C2 -d -r1.94 -r1.95 *** Parser.java 16 Jun 2004 02:17:25 -0000 1.94 --- Parser.java 3 Jul 2004 13:56:07 -0000 1.95 *************** *** 27,31 **** package org.htmlparser; - import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; --- 27,30 ---- *************** *** 794,802 **** /** * Creates the parser on an input string. - * Uses the character set encoding to create a stream of bytes that is - * fed into the parser as if it had come off the wire. * @param html The string containing HTML. ! * @param charset Character set encoding to use when converting the ! * <code>html</code> to a stream of bytes. If charset is <code>null</code> * the default character set is used. * @return A parser with the <code>html</code> string as input. --- 793,799 ---- /** * Creates the parser on an input string. * @param html The string containing HTML. ! * @param charset <em>Optional</em>. The character set encoding that will ! * be reported by {@link #getEncoding}. If charset is <code>null</code> * the default character set is used. * @return A parser with the <code>html</code> string as input. *************** *** 804,828 **** public static Parser createParser (String html, String charset) { - ByteArrayInputStream stream; Parser ret; if (null == html) throw new IllegalArgumentException ("html cannot be null"); ! if (null == charset) ! charset = Page.DEFAULT_CHARSET; ! try ! { ! stream = new ByteArrayInputStream (html.getBytes (charset)); ! ret = new Parser (new Lexer (new Page (stream, charset))); ! } ! catch (UnsupportedEncodingException uee) ! { ! String msg; ! ! msg = uee.getMessage (); ! if (null == msg) ! msg = "unsupported encoding (" + charset + ") exception"; ! ret = new Parser (new Lexer (new Page (msg))); ! } return (ret); --- 801,809 ---- public static Parser createParser (String html, String charset) { Parser ret; if (null == html) throw new IllegalArgumentException ("html cannot be null"); ! ret = new Parser (new Lexer (new Page (html, charset))); return (ret); |