[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Cursor.java,1.9,1.10 Lexer.java,1.10,1.11 Page.
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-09-29 22:02:39
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv32344/lexer Modified Files: Cursor.java Lexer.java Page.java PageIndex.java Source.java Log Message: Fix broken serializability. Index: Cursor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** Cursor.java 28 Sep 2003 15:33:57 -0000 1.9 --- Cursor.java 29 Sep 2003 00:00:38 -0000 1.10 *************** *** 33,36 **** --- 33,37 ---- package org.htmlparser.lexer; + import java.io.Serializable; import org.htmlparser.util.sort.Ordered; *************** *** 39,43 **** * This class remembers the page it came from and its position within the page. */ ! public class Cursor implements Ordered, Cloneable { /** --- 40,48 ---- * This class remembers the page it came from and its position within the page. */ ! public class Cursor ! implements ! Serializable, ! Ordered, ! Cloneable { /** Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Lexer.java 28 Sep 2003 15:33:57 -0000 1.10 --- Lexer.java 29 Sep 2003 00:00:38 -0000 1.11 *************** *** 34,37 **** --- 34,38 ---- import java.io.IOException; + import java.io.Serializable; import java.net.MalformedURLException; import java.net.URL; *************** *** 59,62 **** --- 60,64 ---- public class Lexer implements + Serializable, NodeFactory { *************** *** 75,78 **** --- 77,90 ---- */ protected NodeFactory mFactory; + + /** + * Creates a new instance of a Lexer. + */ + public Lexer () + { + setPage (new Page ("")); + setCursor (new Cursor (getPage (), 0)); + setNodeFactory (this); + } /** Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** Page.java 28 Sep 2003 15:33:57 -0000 1.16 --- Page.java 29 Sep 2003 00:00:39 -0000 1.17 *************** *** 35,38 **** --- 35,39 ---- import java.io.*; import java.io.IOException; + import java.io.Serializable; import java.lang.reflect.*; import java.net.*; *************** *** 46,49 **** --- 47,52 ---- */ public class Page + implements + Serializable { /** *************** *** 75,79 **** * The connection this page is coming from or <code>null</code>. */ ! protected URLConnection mConnection; /** --- 78,82 ---- * The connection this page is coming from or <code>null</code>. */ ! protected transient URLConnection mConnection; /** *************** *** 95,98 **** --- 98,109 ---- /** + * Construct an empty page reading. + */ + public Page () + { + this (""); + } + + /** * Construct a page reading from a URL connection. * @param connection A fully conditioned connection. The connect() *************** *** 154,157 **** --- 165,257 ---- } + // + // Serialization support + // + + private void writeObject (ObjectOutputStream out) + throws + IOException + { + String href; + Source source; + PageIndex index; + + // two cases, reading from a URL and not + if (null != getConnection ()) + { + out.writeBoolean (true); + out.writeInt (mSource.offset ()); // need to preread this much + href = getUrl (); + out.writeObject (href); + setUrl (getConnection ().getURL ().toExternalForm ()); + source = getSource (); + mSource = null; // don't serialize the source if we can avoid it + index = mIndex; + mIndex = null; // will get recreated; valid for the new page anyway? + out.defaultWriteObject (); + mSource = source; + mIndex = index; + } + else + { + out.writeBoolean (false); + href = getUrl (); + out.writeObject (href); + setUrl (null); // don't try and read a bogus URL + out.defaultWriteObject (); + setUrl (href); + } + } + + private void readObject (ObjectInputStream in) + throws + IOException, + ClassNotFoundException + { + boolean fromurl; + int offset; + String href; + URL url; + Cursor cursor; + + fromurl = in.readBoolean (); + if (fromurl) + { + offset = in.readInt (); + href = (String)in.readObject (); + in.defaultReadObject (); + // open the URL + if (null != getUrl ()) + { + url = new URL (getUrl ()); + try + { + setConnection (url.openConnection ()); + } + catch (ParserException pe) + { + throw new IOException (pe.getMessage ()); + } + } + cursor = new Cursor (this, 0); + for (int i = 0; i < offset; i++) + try + { + getCharacter (cursor); + } + catch (ParserException pe) + { + throw new IOException (pe.getMessage ()); + } + setUrl (href); + } + else + { + href = (String)in.readObject (); + in.defaultReadObject (); + setUrl (href); + } + } + /** * Reset the page by resetting the source of characters. *************** *** 189,193 **** - mUrl = null; mConnection = connection; try --- 289,292 ---- *************** *** 232,235 **** --- 331,335 ---- throw new ParserException (ioe.getMessage (), ioe); } + mUrl = connection.getURL ().toExternalForm (); mIndex = new PageIndex (this); } *************** *** 241,252 **** public String getUrl () { - URLConnection connection; - if (null == mUrl) - { - connection = getConnection (); - if (null != connection) - mUrl = connection.getURL ().toExternalForm (); - } - return (mUrl); } --- 341,344 ---- Index: PageIndex.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** PageIndex.java 22 Sep 2003 02:39:59 -0000 1.9 --- PageIndex.java 29 Sep 2003 00:00:39 -0000 1.10 *************** *** 33,36 **** --- 33,37 ---- package org.htmlparser.lexer; + import java.io.Serializable; import org.htmlparser.util.sort.Ordered; import org.htmlparser.util.sort.Sort; *************** *** 46,50 **** * does not incur the overhead of an <code>Integer</code> object per element. */ ! public class PageIndex implements Sortable { /** --- 47,54 ---- * does not incur the overhead of an <code>Integer</code> object per element. */ ! public class PageIndex ! implements ! Serializable, ! Sortable { /** Index: Source.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** Source.java 28 Sep 2003 15:33:57 -0000 1.10 --- Source.java 29 Sep 2003 00:00:39 -0000 1.11 *************** *** 29,36 **** --- 29,40 ---- package org.htmlparser.lexer; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; + import java.io.ObjectInputStream; + import java.io.ObjectOutputStream; import java.io.Reader; + import java.io.Serializable; import java.io.UnsupportedEncodingException; *************** *** 46,50 **** * */ ! public class Source extends Reader { /** --- 50,58 ---- * */ ! public class Source ! extends ! Reader ! implements ! Serializable { /** *************** *** 61,65 **** * The stream of bytes. */ ! protected InputStream mStream; /** --- 69,73 ---- * The stream of bytes. */ ! protected transient InputStream mStream; /** *************** *** 71,75 **** * The converter from bytes to characters. */ ! protected InputStreamReader mReader; /** --- 79,83 ---- * The converter from bytes to characters. */ ! protected transient InputStreamReader mReader; /** *************** *** 143,146 **** --- 151,189 ---- } + // + // Serialization support + // + + private void writeObject (ObjectOutputStream out) + throws + IOException + { + int offset; + char[] buffer; + + if (null != mStream) + { + // remember the offset, drain the input stream, restore the offset + offset = mOffset; + buffer = new char[4096]; + while (-1 != read (buffer)) + ; + mOffset = offset; + } + + out.defaultWriteObject (); + } + + private void readObject (ObjectInputStream in) + throws + IOException, + ClassNotFoundException + { + in.defaultReadObject (); + if (null != mBuffer) // buffer is null when destroy's been called + // pretend we're open, mStream goes null when exhausted + mStream = new ByteArrayInputStream (new byte[0]); + } + /** * Get the input stream being used. *************** *** 421,424 **** --- 464,476 ---- mOffset = 0; mMark = -1; + } + + /** + * Get the position (in characters). + * @return The number of characters that have been read. + */ + public int offset () + { + return (mOffset); } |