Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9637
Modified Files:
Parser.java
Log Message:
Fix bug #973137 Double-bytes characters are messed after parsing.
Add an encoding parameter to the static createParser() method.
Index: Parser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v
retrieving revision 1.93
retrieving revision 1.94
diff -C2 -d -r1.93 -r1.94
*** Parser.java 14 Jun 2004 01:26:51 -0000 1.93
--- Parser.java 16 Jun 2004 02:17:25 -0000 1.94
***************
*** 27,33 ****
--- 27,35 ----
package org.htmlparser;
+ import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
+ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
***************
*** 792,807 ****
/**
* Creates the parser on an input string.
! * @param inputHTML
! * @return Parser
*/
! public static Parser createParser(String inputHTML)
{
! Lexer lexer;
Parser ret;
! if (null == inputHTML)
throw new IllegalArgumentException ("html cannot be null");
! lexer = new Lexer (new Page (inputHTML));
! ret = new Parser (lexer);
return (ret);
--- 794,828 ----
/**
* Creates the parser on an input string.
! * Uses the character set encoding to create a stream of bytes that is
! * fed into the parser as if it had come off the wire.
! * @param html The string containing HTML.
! * @param charset Character set encoding to use when converting the
! * <code>html</code> to a stream of bytes. If charset is <code>null</code>
! * the default character set is used.
! * @return A parser with the <code>html</code> string as input.
*/
! public static Parser createParser (String html, String charset)
{
! ByteArrayInputStream stream;
Parser ret;
! if (null == html)
throw new IllegalArgumentException ("html cannot be null");
! if (null == charset)
! charset = Page.DEFAULT_CHARSET;
! try
! {
! stream = new ByteArrayInputStream (html.getBytes (charset));
! ret = new Parser (new Lexer (new Page (stream, charset)));
! }
! catch (UnsupportedEncodingException uee)
! {
! String msg;
!
! msg = uee.getMessage ();
! if (null == msg)
! msg = "unsupported encoding (" + charset + ") exception";
! ret = new Parser (new Lexer (new Page (msg)));
! }
return (ret);
|