[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Page.java,1.24,1.25
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-11-08 21:30:59
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv18855/src/org/htmlparser/lexer Modified Files: Page.java Log Message: Implement generic node filtering. Added the NodeFilter interface and the filter package. Sideline tag specific scanners; tags now use only one scanner of each type, TagScanner or CompositeTagScanner (except for ScriptScanner). Obviated PeekingIterator by moving the META tag semantics to doSemanticAction, much simpler, old IteratorImpl is now PeekingIteratorImpl but deprecated. Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** Page.java 4 Nov 2003 01:25:02 -0000 1.24 --- Page.java 8 Nov 2003 21:30:56 -0000 1.25 *************** *** 645,652 **** /** ! * Resets this page and begins reading from the source with the ! * given character set. * @param character_set The character set to use to convert bytes into * characters. */ public void setEncoding (String character_set) --- 645,667 ---- /** ! * Begins reading from the source with the given character set. ! * If the current encoding is the same as the requested encoding, ! * this method is a no-op. Otherwise any subsequent characters read from ! * this page will have been decoded using the given character set.<p> ! * Some magic happens here to obtain this result if characters have already ! * been consumed from this page. ! * Since a Reader cannot be dynamically altered to use a different character ! * set, the underlying stream is reset, a new Source is constructed ! * and a comparison made of the characters read so far with the newly ! * read characters up to the current position. ! * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. + * @exception ParserException If a character mismatch occurs between + * characters already provided and those that would have been returned + * had the new character set been in effect from the beginning. An + * exception is also thrown if the underlying stream won't put up with + * these shenanigans. */ public void setEncoding (String character_set) *************** *** 655,672 **** { InputStream stream; ! ! stream = getSource ().getStream (); ! try { ! stream.reset (); ! if (!getEncoding ().equals (character_set)) { mSource = new Source (stream, character_set); ! mIndex = new PageIndex (this); } - } - catch (IOException ioe) - { - throw new ParserException (ioe.getMessage (), ioe); } } --- 670,705 ---- { InputStream stream; ! char[] buffer; ! int offset; ! char[] new_chars; ! ! if (!getEncoding ().equals (character_set)) { ! stream = getSource ().getStream (); ! try { + buffer = mSource.mBuffer; + offset = mSource.mOffset; + stream.reset (); mSource = new Source (stream, character_set); ! if (0 != offset) ! { ! new_chars = new char[offset]; ! if (offset != mSource.read (new_chars)) ! throw new ParserException ("reset stream failed"); ! for (int i = 0; i < offset; i++) ! if (new_chars[i] != buffer[i]) ! throw new ParserException ("character mismatch (new: " ! + new_chars[i] ! + " != old: " ! + buffer[i] ! + ") for encoding at offset " ! + offset); ! } ! } ! catch (IOException ioe) ! { ! throw new ParserException (ioe.getMessage (), ioe); } } } |