Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs1:/tmp/cvs-serv3574/lexer
Modified Files:
Page.java
Log Message:
Fix bug #874175 StringBean doesn't handle charset change well
Add EncodingChangeException to distinguish a recoverable character set change
occuring after the lexer has already coughed up some characters using the wrong
encoding. Added testEncodingChange in LexerTests to excercise it.
Changed IteratorImpl to not wrap a ParserException with another ParserException.
Changed StringBean to retry the URL when an encoding change exception is caught.
Index: Page.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** Page.java 2 Jan 2004 16:24:53 -0000 1.31
--- Page.java 10 Jan 2004 15:23:33 -0000 1.32
***************
*** 27,38 ****
package org.htmlparser.lexer;
! import java.io.*;
import java.io.IOException;
import java.io.Serializable;
! import java.lang.reflect.*;
! import java.net.*;
! import org.htmlparser.util.*;
import org.htmlparser.util.LinkProcessor;
/**
--- 27,46 ----
package org.htmlparser.lexer;
! import java.io.ByteArrayInputStream;
! import java.io.InputStream;
import java.io.IOException;
+ import java.io.ObjectInputStream;
+ import java.io.ObjectOutputStream;
import java.io.Serializable;
! import java.io.UnsupportedEncodingException;
! import java.lang.reflect.InvocationTargetException;
! import java.lang.reflect.Method;
! import java.net.URL;
! import java.net.URLConnection;
! import java.net.UnknownHostException;
! import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.LinkProcessor;
+ import org.htmlparser.util.ParserException;
/**
***************
*** 692,696 ****
for (int i = 0; i < offset; i++)
if (new_chars[i] != buffer[i])
! throw new ParserException ("character mismatch (new: "
+ new_chars[i]
+ " != old: "
--- 700,704 ----
for (int i = 0; i < offset; i++)
if (new_chars[i] != buffer[i])
! throw new EncodingChangeException ("character mismatch (new: "
+ new_chars[i]
+ " != old: "
***************
*** 700,704 ****
+ " to "
+ character_set
! + " at offset "
+ offset);
}
--- 708,712 ----
+ " to "
+ character_set
! + " at character offset "
+ offset);
}
|