[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Cursor.java,1.19,1.20 InputStreamSource.java,1.
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2005-05-15 11:49:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/lexer Modified Files: Cursor.java InputStreamSource.java Lexer.java Page.java PageAttribute.java PageIndex.java Source.java Stream.java StringSource.java Log Message: Documentation revamp part four. Remove some checkstyle warnings. Index: PageIndex.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** PageIndex.java 1 Aug 2004 02:16:04 -0000 1.17 --- PageIndex.java 15 May 2005 11:49:04 -0000 1.18 *************** *** 330,334 **** if ((index >= capacity ()) || (size () == capacity ())) { // allocate more space ! int new_values[] = new int[Math.max (capacity () + mIncrement, index + 1)]; mIncrement *= 2; if (index < capacity ()) --- 330,334 ---- if ((index >= capacity ()) || (size () == capacity ())) { // allocate more space ! int[] new_values = new int[Math.max (capacity () + mIncrement, index + 1)]; mIncrement *= 2; if (index < capacity ()) Index: StringSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/StringSource.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** StringSource.java 12 Apr 2005 11:27:41 -0000 1.3 --- StringSource.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 273,277 **** ret = n; } ! return (ret); } --- 273,277 ---- ret = n; } ! return (ret); } *************** *** 362,366 **** ret = mString.substring (offset, offset + length); } ! return (ret); } --- 362,366 ---- ret = mString.substring (offset, offset + length); } ! return (ret); } Index: Stream.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Stream.java 12 Apr 2005 11:27:41 -0000 1.13 --- Stream.java 15 May 2005 11:49:04 -0000 1.14 *************** *** 29,33 **** import java.io.IOException; import java.io.InputStream; - import java.lang.Runnable; /** --- 29,32 ---- Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** Lexer.java 12 Apr 2005 11:27:41 -0000 1.38 --- Lexer.java 15 May 2005 11:49:04 -0000 1.39 *************** *** 27,31 **** package org.htmlparser.lexer; - import java.io.IOException; import java.io.Serializable; import java.net.MalformedURLException; --- 27,30 ---- *************** *** 77,86 **** /** * Line number to trigger on. ! * This is tested on each <code>nextNode()</code> call, as an aid to debugging. ! * Alter this value and set a breakpoint on the line after the test. ! * Remember, these line numbers are zero based, while most editors are one based. * @see #nextNode ! */ ! static protected int mDebugLineTrigger = -1; /** --- 76,86 ---- /** * Line number to trigger on. ! * This is tested on each <code>nextNode()</code> call, as a debugging aid. ! * Alter this value and set a breakpoint on the guarded statement. ! * Remember, these line numbers are zero based, while most editors are ! * one based. * @see #nextNode ! */ ! protected static int mDebugLineTrigger = -1; /** *************** *** 238,242 **** * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the underlying page. */ public Node nextNode () --- 238,243 ---- * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the ! * underlying page. */ public Node nextNode () *************** *** 252,256 **** * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the underlying page. */ public Node nextNode (boolean quotesmart) --- 253,258 ---- * @return A Remark, Text or Tag, or <code>null</code> if no * more lexemes are present. ! * @exception ParserException If there is a problem with the ! * underlying page. */ public Node nextNode (boolean quotesmart) *************** *** 268,272 **** int lineno = page.row (mCursor); if (mDebugLineTrigger < lineno) ! mDebugLineTrigger = lineno + 1; // trigger on subsequent lines too } start = mCursor.getPosition (); --- 270,274 ---- int lineno = page.row (mCursor); if (mDebugLineTrigger < lineno) ! mDebugLineTrigger = lineno + 1; // trigger on next line too } start = mCursor.getPosition (); *************** *** 302,311 **** else { ! mCursor.retreat (); // remark and tag need this character if ('-' == ch) ret = parseRemark (start, quotesmart); else { ! mCursor.retreat (); // tag needs the previous one too ret = parseTag (start); } --- 304,313 ---- else { ! mCursor.retreat (); // remark/tag need this char if ('-' == ch) ret = parseRemark (start, quotesmart); else { ! mCursor.retreat (); // tag needs prior one too ret = parseTag (start); } *************** *** 365,369 **** break; default: ! throw new IllegalStateException ("how the fuck did we get in state " + state); } } --- 367,371 ---- break; default: ! throw new IllegalStateException ("state " + state); } } *************** *** 416,430 **** mCursor.retreat (); } ! else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch))) quote = ch; // enter quoted state ! // patch contributed by Gernot Fricke to handle escaped closing quote else if (quotesmart && (0 != quote) && ('\\' == ch)) { ! ch = mPage.getCharacter (mCursor); //try to consume escaped character if ((Page.EOF != ch) && ('\\' != ch) // escaped backslash ! && (ch != quote)) // escaped quote character // ( reflects ["] or ['] whichever opened the quotation) ! mCursor.retreat(); // unconsume char if character was not an escapable char. } else if (quotesmart && (ch == quote)) --- 418,433 ---- mCursor.retreat (); } ! else if (quotesmart && (0 == quote) ! && (('\'' == ch) || ('"' == ch))) quote = ch; // enter quoted state ! // patch from Gernot Fricke to handle escaped closing quote else if (quotesmart && (0 != quote) && ('\\' == ch)) { ! ch = mPage.getCharacter (mCursor); // try to consume escape if ((Page.EOF != ch) && ('\\' != ch) // escaped backslash ! && (ch != quote)) // escaped quote character // ( reflects ["] or ['] whichever opened the quotation) ! mCursor.retreat(); // unconsume char if char not an escape } else if (quotesmart && (ch == quote)) *************** *** 432,436 **** else if (quotesmart && (0 == quote) && (ch == '/')) { ! // handle multiline and double slash comments (with a quote) in script like: // I can't handle single quotations. ch = mPage.getCharacter (mCursor); --- 435,440 ---- else if (quotesmart && (0 == quote) && (ch == '/')) { ! // handle multiline and double slash comments (with a quote) ! // in script like: // I can't handle single quotations. ch = mPage.getCharacter (mCursor); *************** *** 465,469 **** done = true; // the order of these tests might be optimized for speed: ! else if ('/' == ch || Character.isLetter (ch) || '!' == ch || '%' == ch) { done = true; --- 469,474 ---- done = true; // the order of these tests might be optimized for speed: ! else if ('/' == ch || Character.isLetter (ch) ! || '!' == ch || '%' == ch) { done = true; *************** *** 486,490 **** * @param start The starting point of the node. * @param end The ending point of the node. ! * @exception ParserException If the nodefactory creation of the string node fails. * @return The new Text node. */ --- 491,496 ---- * @param start The starting point of the node. * @param end The ending point of the node. ! * @exception ParserException If the nodefactory creation of the text ! * node fails. * @return The new Text node. */ *************** *** 498,539 **** length = end - start; if (0 != length) ! { // got some characters ! ret = getNodeFactory ().createStringNode (this.getPage (), start, end); ! } else ret = null; ! return (ret); } private void whitespace (Vector attributes, int[] bookmarks) { if (bookmarks[1] > bookmarks[0]) ! attributes.addElement (new PageAttribute (mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); } private void standalone (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); } private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); } private void naked (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[3], bookmarks[4], (char)0)); } private void single_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, bookmarks[5], '\'')); } private void double_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, bookmarks[6], '"')); } --- 504,584 ---- length = end - start; if (0 != length) ! // got some characters ! ret = getNodeFactory ().createStringNode ( ! this.getPage (), start, end); else ret = null; ! return (ret); } + /** + * Generate a whitespace 'attribute', + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void whitespace (Vector attributes, int[] bookmarks) { if (bookmarks[1] > bookmarks[0]) ! attributes.addElement (new PageAttribute ( ! mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0)); } + /** + * Generate a standalone attribute -- font. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void standalone (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0)); } + /** + * Generate an empty attribute -- color=. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void empty (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0)); } + /** + * Generate an unquoted attribute -- size=1. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void naked (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[3], ! bookmarks[4], (char)0)); } + /** + * Generate an single quoted attribute -- width='100%'. + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void single_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, ! bookmarks[5], '\'')); } + /** + * Generate an double quoted attribute -- CONTENT="Test Development". + * @param attributes The list so far. + * @param bookmarks The array of positions. + */ private void double_quote (Vector attributes, int[] bookmarks) { ! attributes.addElement (new PageAttribute ( ! mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, ! bookmarks[6], '"')); } *************** *** 565,569 **** * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). ! * For doublequotes authors can also use the character entity reference &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters --- 610,615 ---- * vice versa. Authors may also use numeric character references to * represent double quotes (&#34;) and single quotes (&#39;). ! * For doublequotes authors can also use the character entity reference ! * &quot;.<p> * In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters *************** *** 574,579 **** * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each ! * attribute in the reference manual indicates whether its value is case-insensitive.<p> ! * All the attributes defined by this specification are listed in the attribute index.<p> * </cite> * <p> --- 620,627 ---- * Attribute names are always case-insensitive.<p> * Attribute values are generally case-insensitive. The definition of each ! * attribute in the reference manual indicates whether its value is ! * case-insensitive.<p> ! * All the attributes defined by this specification are listed in the ! * attribute index.<p> * </cite> * <p> Index: InputStreamSource.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** InputStreamSource.java 12 Apr 2005 11:27:41 -0000 1.6 --- InputStreamSource.java 15 May 2005 11:49:04 -0000 1.7 *************** *** 47,51 **** /** * An initial buffer size. ! * Has a default value of 16384. */ public static int BUFFER_SIZE = 16384; --- 47,51 ---- /** * An initial buffer size. ! * Has a default value of {@value}. */ public static int BUFFER_SIZE = 16384; *************** *** 70,84 **** * The characters read so far. */ ! public /*volatile*/ char[] mBuffer; /** * The number of valid bytes in the buffer. */ ! public /*volatile*/ int mLevel; /** * The offset of the next byte returned by read(). */ ! public /*volatile*/ int mOffset; /** --- 70,84 ---- * The characters read so far. */ ! protected char[] mBuffer; /** * The number of valid bytes in the buffer. */ ! protected int mLevel; /** * The offset of the next byte returned by read(). */ ! protected int mOffset; /** *************** *** 90,94 **** * Create a source of characters using the default character set. * @param stream The stream of bytes to use. ! * @exception UnsupportedEncodingException If the default character set is unsupported. */ public InputStreamSource (InputStream stream) --- 90,95 ---- * Create a source of characters using the default character set. * @param stream The stream of bytes to use. ! * @exception UnsupportedEncodingException If the default character set ! * is unsupported. */ public InputStreamSource (InputStream stream) *************** *** 103,107 **** * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @exception UnsupportedEncodingException If the character set is unsupported. */ public InputStreamSource (InputStream stream, String charset) --- 104,109 ---- * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @exception UnsupportedEncodingException If the character set ! * is unsupported. */ public InputStreamSource (InputStream stream, String charset) *************** *** 116,123 **** * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @param buffer_size The initial character buffer size. ! * @exception UnsupportedEncodingException If the character set is unsupported. */ ! public InputStreamSource (InputStream stream, String charset, int buffer_size) throws UnsupportedEncodingException --- 118,126 ---- * @param stream The stream of bytes to use. * @param charset The character set used in encoding the stream. ! * @param size The initial character buffer size. ! * @exception UnsupportedEncodingException If the character set ! * is unsupported. */ ! public InputStreamSource (InputStream stream, String charset, int size) throws UnsupportedEncodingException *************** *** 151,155 **** mReader = new InputStreamReader (stream, charset); } ! mBuffer = new char[buffer_size]; mLevel = 0; mOffset = 0; --- 154,158 ---- mReader = new InputStreamReader (stream, charset); } ! mBuffer = new char[size]; mLevel = 0; mOffset = 0; *************** *** 161,164 **** --- 164,172 ---- // + /** + * Serialization support. + * @param out Where to write this object. + * @exception IOException If serialization has a problem. + */ private void writeObject (ObjectOutputStream out) throws *************** *** 177,184 **** mOffset = offset; } ! out.defaultWriteObject (); } private void readObject (ObjectInputStream in) throws --- 185,197 ---- mOffset = offset; } ! out.defaultWriteObject (); } + /** + * Deserialization support. + * @param in Where to read this object from. + * @exception IOException If deserialization has a problem. + */ private void readObject (ObjectInputStream in) throws *************** *** 222,226 **** * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. --- 235,239 ---- * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. Index: Source.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** Source.java 12 Apr 2005 11:27:41 -0000 1.19 --- Source.java 15 May 2005 11:49:04 -0000 1.20 *************** *** 43,47 **** * <li>the fetching of bytes may be asynchronous</li> * <li>the character set may be changed, which resets the input stream</li> ! * <li>characters may be requested more than once, so in general they will be buffered</li> * </ul> */ --- 43,48 ---- * <li>the fetching of bytes may be asynchronous</li> * <li>the character set may be changed, which resets the input stream</li> ! * <li>characters may be requested more than once, so in general they ! * will be buffered</li> * </ul> */ Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** Page.java 12 Apr 2005 11:27:41 -0000 1.49 --- Page.java 15 May 2005 11:49:04 -0000 1.50 *************** *** 57,62 **** * The default charset. * This should be <code>{@value}</code>, ! * see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) section 3.7.1 ! * Another alias is "8859_1". */ public static final String DEFAULT_CHARSET = "ISO-8859-1"; --- 57,63 ---- * The default charset. * This should be <code>{@value}</code>, ! * see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) ! * section 3.7.1 ! * <p>Another alias is "8859_1". */ public static final String DEFAULT_CHARSET = "ISO-8859-1"; *************** *** 95,99 **** */ protected PageIndex mIndex; ! /** * The connection this page is coming from or <code>null</code>. --- 96,100 ---- */ protected PageIndex mIndex; ! /** * The connection this page is coming from or <code>null</code>. *************** *** 104,108 **** * Connection control (proxy, cookies, authorization). */ ! public static ConnectionManager mConnectionManager = new ConnectionManager (); /** --- 105,110 ---- * Connection control (proxy, cookies, authorization). */ ! protected static ConnectionManager mConnectionManager = ! new ConnectionManager (); /** *************** *** 138,142 **** * @param charset The encoding used. * If null, defaults to the <code>DEFAULT_CHARSET</code>. ! * @exception UnsupportedEncodingException If the given charset is not supported. */ public Page (InputStream stream, String charset) --- 140,145 ---- * @param charset The encoding used. * If null, defaults to the <code>DEFAULT_CHARSET</code>. ! * @exception UnsupportedEncodingException If the given charset ! * is not supported. */ public Page (InputStream stream, String charset) *************** *** 186,189 **** --- 189,207 ---- } + /** + * Construct a page from a source. + * @param source The source of characters. + */ + public Page (Source source) + { + if (null == source) + throw new IllegalArgumentException ("source cannot be null"); + mSource = source; + mIndex = new PageIndex (this); + mConnection = null; + mUrl = null; + mBaseUrl = null; + } + // // static methods *************** *** 216,220 **** * which is applicable both to the HTTP header field Content-Type and * the meta tag http-equiv="Content-Type". ! * Note this method also handles non-compliant quoted charset directives such as: * <pre> * text/html; charset="UTF-8" --- 234,239 ---- * which is applicable both to the HTTP header field Content-Type and * the meta tag http-equiv="Content-Type". ! * Note this method also handles non-compliant quoted charset directives ! * such as: * <pre> * text/html; charset="UTF-8" *************** *** 245,249 **** if (index != -1) { ! content = content.substring (index + CHARSET_STRING.length ()).trim (); if (content.startsWith ("=")) { --- 264,269 ---- if (index != -1) { ! content = content.substring (index + ! CHARSET_STRING.length ()).trim (); if (content.startsWith ("=")) { *************** *** 254,262 **** //remove any double quotes from around charset string ! if (content.startsWith ("\"") && content.endsWith ("\"") && (1 < content.length ())) content = content.substring (1, content.length () - 1); //remove any single quote from around charset string ! if (content.startsWith ("'") && content.endsWith ("'") && (1 < content.length ())) content = content.substring (1, content.length () - 1); --- 274,284 ---- //remove any double quotes from around charset string ! if (content.startsWith ("\"") && content.endsWith ("\"") ! && (1 < content.length ())) content = content.substring (1, content.length () - 1); //remove any single quote from around charset string ! if (content.startsWith ("'") && content.endsWith ("'") ! && (1 < content.length ())) content = content.substring (1, content.length () - 1); *************** *** 264,268 **** // Charset names are not case-sensitive; ! // that is, case is always ignored when comparing charset names. // if (!ret.equalsIgnoreCase (content)) // { --- 286,291 ---- // Charset names are not case-sensitive; ! // that is, case is always ignored when comparing ! // charset names. // if (!ret.equalsIgnoreCase (content)) // { *************** *** 287,294 **** * in that case the default is always returned. * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. * @return The character set name. */ ! public static String findCharset (String name, String _default) { String ret; --- 310,317 ---- * in that case the default is always returned. * @param name The name to look up. One of the aliases for a character set. ! * @param fallback The name to return if the lookup fails. * @return The character set name. */ ! public static String findCharset (String name, String fallback) { String ret; *************** *** 327,336 **** // and java.nio.charset.UnsupportedCharsetException // return the default ! ret = _default; System.out.println ( "unable to determine cannonical charset name for " + name + " - using " ! + _default); } --- 350,359 ---- // and java.nio.charset.UnsupportedCharsetException // return the default ! ret = fallback; System.out.println ( "unable to determine cannonical charset name for " + name + " - using " ! + fallback); } *************** *** 348,351 **** --- 371,375 ---- * disconnected, the underling source is saved. * @param out The object stream to store this object in. + * @exception IOException If there is a serialization problem. */ private void writeObject (ObjectOutputStream out) *************** *** 388,391 **** --- 412,419 ---- * For details see <code>writeObject()</code>. * @param in The object stream to decode. + * @exception IOException If there is a deserialization problem with + * the stream. + * @exception ClassNotFoundException If the deserialized class can't be + * located with the current classpath and class loader. */ private void readObject (ObjectInputStream in) *************** *** 461,471 **** * Clean up this page, releasing resources. * Calls <code>close()</code>. ! * @exception Throwable if <code>close()</code> throws an <code>IOException</code>. */ ! protected void finalize () throws Throwable { close (); } ! /** * Get the connection, if any. --- 489,502 ---- * Clean up this page, releasing resources. * Calls <code>close()</code>. ! * @exception Throwable if <code>close()</code> throws an ! * <code>IOException</code>. */ ! protected void finalize () ! throws ! Throwable { close (); } ! /** * Get the connection, if any. *************** *** 504,512 **** catch (UnknownHostException uhe) { ! throw new ParserException ("Connect to " + mConnection.getURL ().toExternalForm () + " failed.", uhe); } catch (IOException ioe) { ! throw new ParserException ("Exception connecting to " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe); } type = getContentType (); --- 535,546 ---- catch (UnknownHostException uhe) { ! throw new ParserException ("Connect to " ! + mConnection.getURL ().toExternalForm () + " failed.", uhe); } catch (IOException ioe) { ! throw new ParserException ("Exception connecting to " ! + mConnection.getURL ().toExternalForm () ! + " (" + ioe.getMessage () + ").", ioe); } type = getContentType (); *************** *** 515,525 **** { contentEncoding = connection.getContentEncoding(); ! if ((null != contentEncoding) && (-1 != contentEncoding.indexOf ("gzip"))) { ! stream = new Stream (new GZIPInputStream (getConnection ().getInputStream ())); } ! else if ((null != contentEncoding) && (-1 != contentEncoding.indexOf ("deflate"))) { ! stream = new Stream (new InflaterInputStream (getConnection ().getInputStream ())); } else --- 549,563 ---- { contentEncoding = connection.getContentEncoding(); ! if ((null != contentEncoding) ! && (-1 != contentEncoding.indexOf ("gzip"))) { ! stream = new Stream (new GZIPInputStream ( ! getConnection ().getInputStream ())); } ! else if ((null != contentEncoding) ! && (-1 != contentEncoding.indexOf ("deflate"))) { ! stream = new Stream (new InflaterInputStream ( ! getConnection ().getInputStream ())); } else *************** *** 549,553 **** catch (IOException ioe) { ! throw new ParserException ("Exception getting input stream from " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe); } mUrl = connection.getURL ().toExternalForm (); --- 587,593 ---- catch (IOException ioe) { ! throw new ParserException ("Exception getting input stream from " ! + mConnection.getURL ().toExternalForm () ! + " (" + ioe.getMessage () + ").", ioe); } mUrl = connection.getURL ().toExternalForm (); *************** *** 596,600 **** mBaseUrl = url; } ! /** * Get the source this page is reading from. --- 636,640 ---- mBaseUrl = url; } ! /** * Get the source this page is reading from. *************** *** 629,634 **** /** ! * Read the character at the cursor position. ! * The cursor position can be behind or equal to the current source position. * Returns end of lines (EOL) as \n, by converting \r and \r\n to \n, * and updates the end-of-line index accordingly --- 669,675 ---- /** ! * Read the character at the given cursor position. ! * The cursor position can be only behind or equal to the ! * current source position. * Returns end of lines (EOL) as \n, by converting \r and \r\n to \n, * and updates the end-of-line index accordingly *************** *** 651,655 **** if (mSource.offset () < i) // hmmm, we could skip ahead, but then what about the EOL index ! throw new ParserException ("attempt to read future characters from source"); else if (mSource.offset () == i) try --- 692,698 ---- if (mSource.offset () < i) // hmmm, we could skip ahead, but then what about the EOL index ! throw new ParserException ( ! "attempt to read future characters from source " ! + i + " > " + mSource.offset ()); else if (mSource.offset () == i) try *************** *** 761,765 **** * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. --- 804,808 ---- * read characters up to the current position. * If a difference is encountered, or some other problem occurs, ! * an exception is thrown. * @param character_set The character set to use to convert bytes into * characters. *************** *** 921,925 **** { String ret; ! try { --- 964,968 ---- { String ret; ! try { *************** *** 936,940 **** + ioe.getMessage ()); } ! return (ret); } --- 979,983 ---- + ioe.getMessage ()); } ! return (ret); } *************** *** 957,961 **** if ((mSource.offset () < start) || (mSource.offset () < end)) ! throw new IllegalArgumentException ("attempt to extract future characters from source"); if (end < start) { --- 1000,1006 ---- if ((mSource.offset () < start) || (mSource.offset () < end)) ! throw new IllegalArgumentException ( ! "attempt to extract future characters from source" ! + start + "|" + end + " > " + mSource.offset ()); if (end < start) { Index: PageAttribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageAttribute.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** PageAttribute.java 12 Apr 2005 11:27:41 -0000 1.3 --- PageAttribute.java 15 May 2005 11:49:04 -0000 1.4 *************** *** 28,32 **** import org.htmlparser.Attribute; - import org.htmlparser.lexer.Page; /** --- 28,31 ---- Index: Cursor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** Cursor.java 12 Apr 2005 11:27:41 -0000 1.19 --- Cursor.java 15 May 2005 11:49:04 -0000 1.20 *************** *** 167,170 **** } } - - \ No newline at end of file --- 167,168 ---- |