Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14744/util
Modified Files:
ParserUtils.java
Log Message:
Further fix to bug #973137 Double-bytes characters are messed after parsing.
Created a proper String based source with the encoding only optionally specified.
A string is no longer converted to a byte array and then back to characters.
Index: ParserUtils.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** ParserUtils.java 2 Jul 2004 00:49:32 -0000 1.42
--- ParserUtils.java 3 Jul 2004 13:56:09 -0000 1.43
***************
*** 27,31 ****
package org.htmlparser.util;
- import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
--- 27,30 ----
***************
*** 1098,1103 ****
Parser parser = new Parser();
Lexer lexer = new Lexer();
! String defCharSet = new Page().DEFAULT_CHARSET;
! Page page = new Page(new ByteArrayInputStream(input.getBytes(defCharSet)), defCharSet);
lexer.setPage(page);
parser.setLexer(lexer);
--- 1097,1101 ----
Parser parser = new Parser();
Lexer lexer = new Lexer();
! Page page = new Page(input);
lexer.setPage(page);
parser.setLexer(lexer);
|