Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14744
Modified Files:
Parser.java
Log Message:
Further fix to bug #973137 Double-bytes characters are messed after parsing.
Created a proper String based source with the encoding only optionally specified.
A string is no longer converted to a byte array and then back to characters.
Index: Parser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v
retrieving revision 1.94
retrieving revision 1.95
diff -C2 -d -r1.94 -r1.95
*** Parser.java 16 Jun 2004 02:17:25 -0000 1.94
--- Parser.java 3 Jul 2004 13:56:07 -0000 1.95
***************
*** 27,31 ****
package org.htmlparser;
- import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
--- 27,30 ----
***************
*** 794,802 ****
/**
* Creates the parser on an input string.
- * Uses the character set encoding to create a stream of bytes that is
- * fed into the parser as if it had come off the wire.
* @param html The string containing HTML.
! * @param charset Character set encoding to use when converting the
! * <code>html</code> to a stream of bytes. If charset is <code>null</code>
* the default character set is used.
* @return A parser with the <code>html</code> string as input.
--- 793,799 ----
/**
* Creates the parser on an input string.
* @param html The string containing HTML.
! * @param charset <em>Optional</em>. The character set encoding that will
! * be reported by {@link #getEncoding}. If charset is <code>null</code>
* the default character set is used.
* @return A parser with the <code>html</code> string as input.
***************
*** 804,828 ****
public static Parser createParser (String html, String charset)
{
- ByteArrayInputStream stream;
Parser ret;
if (null == html)
throw new IllegalArgumentException ("html cannot be null");
! if (null == charset)
! charset = Page.DEFAULT_CHARSET;
! try
! {
! stream = new ByteArrayInputStream (html.getBytes (charset));
! ret = new Parser (new Lexer (new Page (stream, charset)));
! }
! catch (UnsupportedEncodingException uee)
! {
! String msg;
!
! msg = uee.getMessage ();
! if (null == msg)
! msg = "unsupported encoding (" + charset + ") exception";
! ret = new Parser (new Lexer (new Page (msg)));
! }
return (ret);
--- 801,809 ----
public static Parser createParser (String html, String charset)
{
Parser ret;
if (null == html)
throw new IllegalArgumentException ("html cannot be null");
! ret = new Parser (new Lexer (new Page (html, charset)));
return (ret);
|