[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Cursor.java,1.19,1.20 InputStreamSource.java,1.

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31674/src/org/htmlparser/lexer

Modified Files:
	Cursor.java InputStreamSource.java Lexer.java Page.java 
	PageAttribute.java PageIndex.java Source.java Stream.java 
	StringSource.java 
Log Message:
Documentation revamp part four.
Remove some checkstyle warnings.

Index: PageIndex.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** PageIndex.java	1 Aug 2004 02:16:04 -0000	1.17
--- PageIndex.java	15 May 2005 11:49:04 -0000	1.18
***************
*** 330,334 ****
          if ((index >= capacity ()) || (size () == capacity ()))
          {   // allocate more space
!             int new_values[] = new int[Math.max (capacity () + mIncrement, index + 1)];
              mIncrement *= 2;
              if (index < capacity ())
--- 330,334 ----
          if ((index >= capacity ()) || (size () == capacity ()))
          {   // allocate more space
!             int[] new_values = new int[Math.max (capacity () + mIncrement, index + 1)];
              mIncrement *= 2;
              if (index < capacity ())

Index: StringSource.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/StringSource.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** StringSource.java	12 Apr 2005 11:27:41 -0000	1.3
--- StringSource.java	15 May 2005 11:49:04 -0000	1.4
***************
*** 273,277 ****
              ret = n;
          }
!         
          return (ret);
      }
--- 273,277 ----
              ret = n;
          }
! 
          return (ret);
      }
***************
*** 362,366 ****
                  ret = mString.substring (offset, offset + length);
          }
!         
          return (ret);
      }
--- 362,366 ----
                  ret = mString.substring (offset, offset + length);
          }
! 
          return (ret);
      }

Index: Stream.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -d -r1.13 -r1.14
*** Stream.java	12 Apr 2005 11:27:41 -0000	1.13
--- Stream.java	15 May 2005 11:49:04 -0000	1.14
***************
*** 29,33 ****
  import java.io.IOException;
  import java.io.InputStream;
- import java.lang.Runnable;
  
  /**
--- 29,32 ----

Index: Lexer.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -d -r1.38 -r1.39
*** Lexer.java	12 Apr 2005 11:27:41 -0000	1.38
--- Lexer.java	15 May 2005 11:49:04 -0000	1.39
***************
*** 27,31 ****
  package org.htmlparser.lexer;
  
- import java.io.IOException;
  import java.io.Serializable;
  import java.net.MalformedURLException;
--- 27,30 ----
***************
*** 77,86 ****
      /**
       * Line number to trigger on.
!      * This is tested on each <code>nextNode()</code> call, as an aid to debugging.
!      * Alter this value and set a breakpoint on the line after the test.
!      * Remember, these line numbers are zero based, while most editors are one based.
       * @see #nextNode
!      */ 
!     static protected int mDebugLineTrigger = -1;
  
      /**
--- 76,86 ----
      /**
       * Line number to trigger on.
!      * This is tested on each <code>nextNode()</code> call, as a debugging aid.
!      * Alter this value and set a breakpoint on the guarded statement.
!      * Remember, these line numbers are zero based, while most editors are
!      * one based.
       * @see #nextNode
!      */
!     protected static int mDebugLineTrigger = -1;
  
      /**
***************
*** 238,242 ****
       * @return A Remark, Text or Tag, or <code>null</code> if no
       * more lexemes are present.
!      * @exception ParserException If there is a problem with the underlying page.
       */
      public Node nextNode ()
--- 238,243 ----
       * @return A Remark, Text or Tag, or <code>null</code> if no
       * more lexemes are present.
!      * @exception ParserException If there is a problem with the
!      * underlying page.
       */
      public Node nextNode ()
***************
*** 252,256 ****
       * @return A Remark, Text or Tag, or <code>null</code> if no
       * more lexemes are present.
!      * @exception ParserException If there is a problem with the underlying page.
       */
      public Node nextNode (boolean quotesmart)
--- 253,258 ----
       * @return A Remark, Text or Tag, or <code>null</code> if no
       * more lexemes are present.
!      * @exception ParserException If there is a problem with the
!      * underlying page.
       */
      public Node nextNode (boolean quotesmart)
***************
*** 268,272 ****
              int lineno = page.row (mCursor);
              if (mDebugLineTrigger < lineno)
!                 mDebugLineTrigger = lineno + 1; // trigger on subsequent lines too
          }
          start = mCursor.getPosition ();
--- 270,274 ----
              int lineno = page.row (mCursor);
              if (mDebugLineTrigger < lineno)
!                 mDebugLineTrigger = lineno + 1; // trigger on next line too
          }
          start = mCursor.getPosition ();
***************
*** 302,311 ****
                          else
                          {
!                             mCursor.retreat (); // remark and tag need this character
                              if ('-' == ch)
                                  ret = parseRemark (start, quotesmart);
                              else
                              {
!                                 mCursor.retreat (); // tag needs the previous one too
                                  ret = parseTag (start);
                              }
--- 304,313 ----
                          else
                          {
!                             mCursor.retreat (); // remark/tag need this char
                              if ('-' == ch)
                                  ret = parseRemark (start, quotesmart);
                              else
                              {
!                                 mCursor.retreat (); // tag needs prior one too
                                  ret = parseTag (start);
                              }
***************
*** 365,369 ****
                          break;
                      default:
!                         throw new IllegalStateException ("how the fuck did we get in state " + state);
                  }
          }
--- 367,371 ----
                          break;
                      default:
!                         throw new IllegalStateException ("state " + state);
                  }
          }
***************
*** 416,430 ****
                      mCursor.retreat ();
              }
!             else if (quotesmart && (0 == quote) && (('\'' == ch) || ('"' == ch)))
                  quote = ch; // enter quoted state
!             // patch contributed by Gernot Fricke to handle escaped closing quote
              else if (quotesmart && (0 != quote) && ('\\' == ch))
              {
!                 ch = mPage.getCharacter (mCursor); //try to consume escaped character
                  if ((Page.EOF != ch)
                      && ('\\' != ch) // escaped backslash
!                     && (ch != quote)) // escaped quote character 
                         // ( reflects ["] or [']  whichever opened the quotation)
!                     mCursor.retreat(); // unconsume char if character was not an escapable char.
              }
              else if (quotesmart && (ch == quote))
--- 418,433 ----
                      mCursor.retreat ();
              }
!             else if (quotesmart && (0 == quote)
!                 && (('\'' == ch) || ('"' == ch)))
                  quote = ch; // enter quoted state
!             // patch from Gernot Fricke to handle escaped closing quote
              else if (quotesmart && (0 != quote) && ('\\' == ch))
              {
!                 ch = mPage.getCharacter (mCursor); // try to consume escape
                  if ((Page.EOF != ch)
                      && ('\\' != ch) // escaped backslash
!                     && (ch != quote)) // escaped quote character
                         // ( reflects ["] or [']  whichever opened the quotation)
!                     mCursor.retreat(); // unconsume char if char not an escape
              }
              else if (quotesmart && (ch == quote))
***************
*** 432,436 ****
              else if (quotesmart && (0 == quote) && (ch == '/'))
              {
!                 // handle multiline and double slash comments (with a quote) in script like:
                  // I can't handle single quotations.
                  ch = mPage.getCharacter (mCursor);
--- 435,440 ----
              else if (quotesmart && (0 == quote) && (ch == '/'))
              {
!                 // handle multiline and double slash comments (with a quote)
!                 // in script like:
                  // I can't handle single quotations.
                  ch = mPage.getCharacter (mCursor);
***************
*** 465,469 ****
                      done = true;
                  // the order of these tests might be optimized for speed:
!                 else if ('/' == ch || Character.isLetter (ch) || '!' == ch || '%' == ch)
                  {
                      done = true;
--- 469,474 ----
                      done = true;
                  // the order of these tests might be optimized for speed:
!                 else if ('/' == ch || Character.isLetter (ch)
!                     || '!' == ch || '%' == ch)
                  {
                      done = true;
***************
*** 486,490 ****
       * @param start The starting point of the node.
       * @param end The ending point of the node.
!      * @exception ParserException If the nodefactory creation of the string node fails.
       * @return The new Text node.
       */
--- 491,496 ----
       * @param start The starting point of the node.
       * @param end The ending point of the node.
!      * @exception ParserException If the nodefactory creation of the text
!      * node fails.
       * @return The new Text node.
       */
***************
*** 498,539 ****
          length = end - start;
          if (0 != length)
!         {   // got some characters
!             ret = getNodeFactory ().createStringNode (this.getPage (), start, end);
!         }
          else
              ret = null;
!         
          return (ret);
      }
  
      private void whitespace (Vector attributes, int[] bookmarks)
      {
          if (bookmarks[1] > bookmarks[0])
!             attributes.addElement (new PageAttribute (mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0));
      }
  
      private void standalone (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0));
      }
  
      private void empty (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0));
      }
  
      private void naked (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[3], bookmarks[4], (char)0));
      }
  
      private void single_quote (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1, bookmarks[5], '\''));
      }
  
      private void double_quote (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1, bookmarks[6], '"'));
      }
  
--- 504,584 ----
          length = end - start;
          if (0 != length)
!             // got some characters
!             ret = getNodeFactory ().createStringNode (
!                 this.getPage (), start, end);
          else
              ret = null;
! 
          return (ret);
      }
  
+     /**
+      * Generate a whitespace 'attribute',
+      * @param attributes The list so far.
+      * @param bookmarks The array of positions.
+      */
      private void whitespace (Vector attributes, int[] bookmarks)
      {
          if (bookmarks[1] > bookmarks[0])
!             attributes.addElement (new PageAttribute (
!                 mPage, -1, -1, bookmarks[0], bookmarks[1], (char)0));
      }
  
+     /**
+      * Generate a standalone attribute -- font.
+      * @param attributes The list so far.
+      * @param bookmarks The array of positions.
+      */
      private void standalone (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (
!             mPage, bookmarks[1], bookmarks[2], -1, -1, (char)0));
      }
  
+     /**
+      * Generate an empty attribute -- color=.
+      * @param attributes The list so far.
+      * @param bookmarks The array of positions.
+      */
      private void empty (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (
!             mPage, bookmarks[1], bookmarks[2], bookmarks[2] + 1, -1, (char)0));
      }
  
+     /**
+      * Generate an unquoted attribute -- size=1.
+      * @param attributes The list so far.
+      * @param bookmarks The array of positions.
+      */
      private void naked (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (
!             mPage, bookmarks[1], bookmarks[2], bookmarks[3],
!             bookmarks[4], (char)0));
      }
  
+     /**
+      * Generate an single quoted attribute -- width='100%'.
+      * @param attributes The list so far.
+      * @param bookmarks The array of positions.
+      */
      private void single_quote (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (
!             mPage, bookmarks[1], bookmarks[2], bookmarks[4] + 1,
!             bookmarks[5], '\''));
      }
  
+     /**
+      * Generate an double quoted attribute -- CONTENT="Test Development".
+      * @param attributes The list so far.
+      * @param bookmarks The array of positions.
+      */
      private void double_quote (Vector attributes, int[] bookmarks)
      {
!         attributes.addElement (new PageAttribute (
!             mPage, bookmarks[1], bookmarks[2], bookmarks[5] + 1,
!             bookmarks[6], '"'));
      }
  
***************
*** 565,569 ****
       * vice versa. Authors may also use numeric character references to
       * represent double quotes (&amp;#34;) and single quotes (&amp;#39;).
!      * For doublequotes authors can also use the character entity reference &amp;quot;.<p>
       * In certain cases, authors may specify the value of an attribute without
       * any quotation marks. The attribute value may only contain letters
--- 610,615 ----
       * vice versa. Authors may also use numeric character references to
       * represent double quotes (&amp;#34;) and single quotes (&amp;#39;).
!      * For doublequotes authors can also use the character entity reference
!      * &amp;quot;.<p>
       * In certain cases, authors may specify the value of an attribute without
       * any quotation marks. The attribute value may only contain letters
***************
*** 574,579 ****
       * Attribute names are always case-insensitive.<p>
       * Attribute values are generally case-insensitive. The definition of each
!      * attribute in the reference manual indicates whether its value is case-insensitive.<p>
!      * All the attributes defined by this specification are listed in the attribute index.<p>
       * </cite>
       * <p>
--- 620,627 ----
       * Attribute names are always case-insensitive.<p>
       * Attribute values are generally case-insensitive. The definition of each
!      * attribute in the reference manual indicates whether its value is
!      * case-insensitive.<p>
!      * All the attributes defined by this specification are listed in the
!      * attribute index.<p>
       * </cite>
       * <p>

Index: InputStreamSource.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/InputStreamSource.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** InputStreamSource.java	12 Apr 2005 11:27:41 -0000	1.6
--- InputStreamSource.java	15 May 2005 11:49:04 -0000	1.7
***************
*** 47,51 ****
      /**
       * An initial buffer size.
!      * Has a default value of 16384.
       */
      public static int BUFFER_SIZE = 16384;
--- 47,51 ----
      /**
       * An initial buffer size.
!      * Has a default value of {@value}.
       */
      public static int BUFFER_SIZE = 16384;
***************
*** 70,84 ****
       * The characters read so far.
       */
!     public /*volatile*/ char[] mBuffer;
  
      /**
       * The number of valid bytes in the buffer.
       */
!     public /*volatile*/ int mLevel;
  
      /**
       * The offset of the next byte returned by read().
       */
!     public /*volatile*/ int mOffset;
  
      /**
--- 70,84 ----
       * The characters read so far.
       */
!     protected char[] mBuffer;
  
      /**
       * The number of valid bytes in the buffer.
       */
!     protected int mLevel;
  
      /**
       * The offset of the next byte returned by read().
       */
!     protected int mOffset;
  
      /**
***************
*** 90,94 ****
       * Create a source of characters using the default character set.
       * @param stream The stream of bytes to use.
!      * @exception UnsupportedEncodingException If the default character set is unsupported.
       */
      public InputStreamSource (InputStream stream)
--- 90,95 ----
       * Create a source of characters using the default character set.
       * @param stream The stream of bytes to use.
!      * @exception UnsupportedEncodingException If the default character set
!      * is unsupported.
       */
      public InputStreamSource (InputStream stream)
***************
*** 103,107 ****
       * @param stream The stream of bytes to use.
       * @param charset The character set used in encoding the stream.
!      * @exception UnsupportedEncodingException If the character set is unsupported.
       */
      public InputStreamSource (InputStream stream, String charset)
--- 104,109 ----
       * @param stream The stream of bytes to use.
       * @param charset The character set used in encoding the stream.
!      * @exception UnsupportedEncodingException If the character set
!      * is unsupported.
       */
      public InputStreamSource (InputStream stream, String charset)
***************
*** 116,123 ****
       * @param stream The stream of bytes to use.
       * @param charset The character set used in encoding the stream.
!      * @param buffer_size The initial character buffer size.
!      * @exception UnsupportedEncodingException If the character set is unsupported.
       */
!     public InputStreamSource (InputStream stream, String charset, int buffer_size)
          throws
              UnsupportedEncodingException
--- 118,126 ----
       * @param stream The stream of bytes to use.
       * @param charset The character set used in encoding the stream.
!      * @param size The initial character buffer size.
!      * @exception UnsupportedEncodingException If the character set
!      * is unsupported.
       */
!     public InputStreamSource (InputStream stream, String charset, int size)
          throws
              UnsupportedEncodingException
***************
*** 151,155 ****
              mReader = new InputStreamReader (stream, charset);
          }
!         mBuffer = new char[buffer_size];
          mLevel = 0;
          mOffset = 0;
--- 154,158 ----
              mReader = new InputStreamReader (stream, charset);
          }
!         mBuffer = new char[size];
          mLevel = 0;
          mOffset = 0;
***************
*** 161,164 ****
--- 164,172 ----
      //
  
+     /**
+      * Serialization support.
+      * @param out Where to write this object.
+      * @exception IOException If serialization has a problem.
+      */
      private void writeObject (ObjectOutputStream out)
          throws
***************
*** 177,184 ****
              mOffset = offset;
          }
!         
          out.defaultWriteObject ();
      }
  
      private void readObject (ObjectInputStream in)
          throws
--- 185,197 ----
              mOffset = offset;
          }
! 
          out.defaultWriteObject ();
      }
  
+     /**
+      * Deserialization support.
+      * @param in Where to read this object from.
+      * @exception IOException If deserialization has a problem.
+      */
      private void readObject (ObjectInputStream in)
          throws
***************
*** 222,226 ****
       * read characters up to the current position.
       * If a difference is encountered, or some other problem occurs,
!      * an exception is thrown. 
       * @param character_set The character set to use to convert bytes into
       * characters.
--- 235,239 ----
       * read characters up to the current position.
       * If a difference is encountered, or some other problem occurs,
!      * an exception is thrown.
       * @param character_set The character set to use to convert bytes into
       * characters.

Index: Source.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** Source.java	12 Apr 2005 11:27:41 -0000	1.19
--- Source.java	15 May 2005 11:49:04 -0000	1.20
***************
*** 43,47 ****
   * <li>the fetching of bytes may be asynchronous</li>
   * <li>the character set may be changed, which resets the input stream</li>
!  * <li>characters may be requested more than once, so in general they will be buffered</li>
   * </ul>
   */
--- 43,48 ----
   * <li>the fetching of bytes may be asynchronous</li>
   * <li>the character set may be changed, which resets the input stream</li>
!  * <li>characters may be requested more than once, so in general they
!  * will be buffered</li>
   * </ul>
   */

Index: Page.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v
retrieving revision 1.49
retrieving revision 1.50
diff -C2 -d -r1.49 -r1.50
*** Page.java	12 Apr 2005 11:27:41 -0000	1.49
--- Page.java	15 May 2005 11:49:04 -0000	1.50
***************
*** 57,62 ****
       * The default charset.
       * This should be <code>{@value}</code>,
!      * see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) section 3.7.1
!      * Another alias is "8859_1".
       */
      public static final String DEFAULT_CHARSET = "ISO-8859-1";
--- 57,63 ----
       * The default charset.
       * This should be <code>{@value}</code>,
!      * see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616)
!      * section 3.7.1
!      * <p>Another alias is "8859_1".
       */
      public static final String DEFAULT_CHARSET = "ISO-8859-1";
***************
*** 95,99 ****
       */
      protected PageIndex mIndex;
!     
      /**
       * The connection this page is coming from or <code>null</code>.
--- 96,100 ----
       */
      protected PageIndex mIndex;
! 
      /**
       * The connection this page is coming from or <code>null</code>.
***************
*** 104,108 ****
       * Connection control (proxy, cookies, authorization).
       */
!     public static ConnectionManager mConnectionManager = new ConnectionManager ();
  
      /**
--- 105,110 ----
       * Connection control (proxy, cookies, authorization).
       */
!     protected static ConnectionManager mConnectionManager =
!         new ConnectionManager ();
  
      /**
***************
*** 138,142 ****
       * @param charset The encoding used.
       * If null, defaults to the <code>DEFAULT_CHARSET</code>.
!      * @exception UnsupportedEncodingException If the given charset is not supported.
       */
      public Page (InputStream stream, String charset)
--- 140,145 ----
       * @param charset The encoding used.
       * If null, defaults to the <code>DEFAULT_CHARSET</code>.
!      * @exception UnsupportedEncodingException If the given charset
!      * is not supported.
       */
      public Page (InputStream stream, String charset)
***************
*** 186,189 ****
--- 189,207 ----
      }
  
+     /**
+      * Construct a page from a source.
+      * @param source The source of characters.
+      */
+     public Page (Source source)
+     {
+         if (null == source)
+             throw new IllegalArgumentException ("source cannot be null");
+         mSource = source;
+         mIndex = new PageIndex (this);
+         mConnection = null;
+         mUrl = null;
+         mBaseUrl = null;
+     }
+ 
      //
      // static methods
***************
*** 216,220 ****
       * which is applicable both to the HTTP header field Content-Type and
       * the meta tag http-equiv="Content-Type".
!      * Note this method also handles non-compliant quoted charset directives such as:
       * <pre>
       * text/html; charset="UTF-8"
--- 234,239 ----
       * which is applicable both to the HTTP header field Content-Type and
       * the meta tag http-equiv="Content-Type".
!      * Note this method also handles non-compliant quoted charset directives
!      * such as:
       * <pre>
       * text/html; charset="UTF-8"
***************
*** 245,249 ****
              if (index != -1)
              {
!                 content = content.substring (index + CHARSET_STRING.length ()).trim ();
                  if (content.startsWith ("="))
                  {
--- 264,269 ----
              if (index != -1)
              {
!                 content = content.substring (index +
!                     CHARSET_STRING.length ()).trim ();
                  if (content.startsWith ("="))
                  {
***************
*** 254,262 ****
  
                      //remove any double quotes from around charset string
!                     if (content.startsWith ("\"") && content.endsWith ("\"") && (1 < content.length ()))
                          content = content.substring (1, content.length () - 1);
  
                      //remove any single quote from around charset string
!                     if (content.startsWith ("'") && content.endsWith ("'") && (1 < content.length ()))
                          content = content.substring (1, content.length () - 1);
  
--- 274,284 ----
  
                      //remove any double quotes from around charset string
!                     if (content.startsWith ("\"") && content.endsWith ("\"")
!                         && (1 < content.length ()))
                          content = content.substring (1, content.length () - 1);
  
                      //remove any single quote from around charset string
!                     if (content.startsWith ("'") && content.endsWith ("'")
!                         && (1 < content.length ()))
                          content = content.substring (1, content.length () - 1);
  
***************
*** 264,268 ****
  
                      // Charset names are not case-sensitive;
!                     // that is, case is always ignored when comparing charset names.
  //                    if (!ret.equalsIgnoreCase (content))
  //                    {
--- 286,291 ----
  
                      // Charset names are not case-sensitive;
!                     // that is, case is always ignored when comparing
!                     // charset names.
  //                    if (!ret.equalsIgnoreCase (content))
  //                    {
***************
*** 287,294 ****
       * in that case the default is always returned.
       * @param name The name to look up. One of the aliases for a character set.
!      * @param _default The name to return if the lookup fails.
       * @return The character set name.
       */
!     public static String findCharset (String name, String _default)
      {
          String ret;
--- 310,317 ----
       * in that case the default is always returned.
       * @param name The name to look up. One of the aliases for a character set.
!      * @param fallback The name to return if the lookup fails.
       * @return The character set name.
       */
!     public static String findCharset (String name, String fallback)
      {
          String ret;
***************
*** 327,336 ****
              // and java.nio.charset.UnsupportedCharsetException
              // return the default
!             ret = _default;
              System.out.println (
                  "unable to determine cannonical charset name for "
                  + name
                  + " - using "
!                 + _default);
          }
  
--- 350,359 ----
              // and java.nio.charset.UnsupportedCharsetException
              // return the default
!             ret = fallback;
              System.out.println (
                  "unable to determine cannonical charset name for "
                  + name
                  + " - using "
!                 + fallback);
          }
  
***************
*** 348,351 ****
--- 371,375 ----
       * disconnected, the underling source is saved.
       * @param out The object stream to store this object in.
+      * @exception IOException If there is a serialization problem.
       */
      private void writeObject (ObjectOutputStream out)
***************
*** 388,391 ****
--- 412,419 ----
       * For details see <code>writeObject()</code>.
       * @param in The object stream to decode.
+      * @exception IOException If there is a deserialization problem with
+      * the stream.
+      * @exception ClassNotFoundException If the deserialized class can't be
+      * located with the current classpath and class loader.
       */
      private void readObject (ObjectInputStream in)
***************
*** 461,471 ****
       * Clean up this page, releasing resources.
       * Calls <code>close()</code>.
!      * @exception Throwable if <code>close()</code> throws an <code>IOException</code>.
       */
!     protected void finalize () throws Throwable
      {
          close ();
      }
!     
      /**
       * Get the connection, if any.
--- 489,502 ----
       * Clean up this page, releasing resources.
       * Calls <code>close()</code>.
!      * @exception Throwable if <code>close()</code> throws an
!      * <code>IOException</code>.
       */
!     protected void finalize ()
!         throws
!             Throwable
      {
          close ();
      }
! 
      /**
       * Get the connection, if any.
***************
*** 504,512 ****
          catch (UnknownHostException uhe)
          {
!             throw new ParserException ("Connect to " + mConnection.getURL ().toExternalForm () + " failed.", uhe);
          }
          catch (IOException ioe)
          {
!             throw new ParserException ("Exception connecting to " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe);
          }
          type = getContentType ();
--- 535,546 ----
          catch (UnknownHostException uhe)
          {
!             throw new ParserException ("Connect to "
!                 + mConnection.getURL ().toExternalForm () + " failed.", uhe);
          }
          catch (IOException ioe)
          {
!             throw new ParserException ("Exception connecting to "
!                 + mConnection.getURL ().toExternalForm ()
!                 + " (" + ioe.getMessage () + ").", ioe);
          }
          type = getContentType ();
***************
*** 515,525 ****
          {
              contentEncoding = connection.getContentEncoding();
!             if ((null != contentEncoding) && (-1 != contentEncoding.indexOf ("gzip")))
              {
!                 stream = new Stream (new GZIPInputStream (getConnection ().getInputStream ()));
              }
!             else if ((null != contentEncoding) && (-1 != contentEncoding.indexOf ("deflate")))
              {
!                 stream = new Stream (new InflaterInputStream (getConnection ().getInputStream ()));
              }
              else
--- 549,563 ----
          {
              contentEncoding = connection.getContentEncoding();
!             if ((null != contentEncoding)
!                 && (-1 != contentEncoding.indexOf ("gzip")))
              {
!                 stream = new Stream (new GZIPInputStream (
!                     getConnection ().getInputStream ()));
              }
!             else if ((null != contentEncoding)
!                 && (-1 != contentEncoding.indexOf ("deflate")))
              {
!                 stream = new Stream (new InflaterInputStream (
!                     getConnection ().getInputStream ()));
              }
              else
***************
*** 549,553 ****
          catch (IOException ioe)
          {
!             throw new ParserException ("Exception getting input stream from " + mConnection.getURL ().toExternalForm () + " (" + ioe.getMessage () + ").", ioe);
          }
          mUrl = connection.getURL ().toExternalForm ();
--- 587,593 ----
          catch (IOException ioe)
          {
!             throw new ParserException ("Exception getting input stream from "
!                 + mConnection.getURL ().toExternalForm ()
!                 + " (" + ioe.getMessage () + ").", ioe);
          }
          mUrl = connection.getURL ().toExternalForm ();
***************
*** 596,600 ****
          mBaseUrl = url;
      }
!     
      /**
       * Get the source this page is reading from.
--- 636,640 ----
          mBaseUrl = url;
      }
! 
      /**
       * Get the source this page is reading from.
***************
*** 629,634 ****
  
      /**
!      * Read the character at the cursor position.
!      * The cursor position can be behind or equal to the current source position.
       * Returns end of lines (EOL) as \n, by converting \r and \r\n to \n,
       * and updates the end-of-line index accordingly
--- 669,675 ----
  
      /**
!      * Read the character at the given cursor position.
!      * The cursor position can be only behind or equal to the
!      * current source position.
       * Returns end of lines (EOL) as \n, by converting \r and \r\n to \n,
       * and updates the end-of-line index accordingly
***************
*** 651,655 ****
          if (mSource.offset () < i)
              // hmmm, we could skip ahead, but then what about the EOL index
!             throw new ParserException ("attempt to read future characters from source");
          else if (mSource.offset () == i)
              try
--- 692,698 ----
          if (mSource.offset () < i)
              // hmmm, we could skip ahead, but then what about the EOL index
!             throw new ParserException (
!                 "attempt to read future characters from source "
!                 + i + " > " + mSource.offset ());
          else if (mSource.offset () == i)
              try
***************
*** 761,765 ****
       * read characters up to the current position.
       * If a difference is encountered, or some other problem occurs,
!      * an exception is thrown. 
       * @param character_set The character set to use to convert bytes into
       * characters.
--- 804,808 ----
       * read characters up to the current position.
       * If a difference is encountered, or some other problem occurs,
!      * an exception is thrown.
       * @param character_set The character set to use to convert bytes into
       * characters.
***************
*** 921,925 ****
      {
          String ret;
!         
          try
          {
--- 964,968 ----
      {
          String ret;
! 
          try
          {
***************
*** 936,940 ****
                  + ioe.getMessage ());
          }
!         
          return (ret);
      }
--- 979,983 ----
                  + ioe.getMessage ());
          }
! 
          return (ret);
      }
***************
*** 957,961 ****
  
          if ((mSource.offset () < start) || (mSource.offset () < end))
!             throw new IllegalArgumentException ("attempt to extract future characters from source");
          if (end < start)
          {
--- 1000,1006 ----
  
          if ((mSource.offset () < start) || (mSource.offset () < end))
!             throw new IllegalArgumentException (
!                 "attempt to extract future characters from source"
!                 + start + "|" + end + " > " + mSource.offset ());
          if (end < start)
          {

Index: PageAttribute.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageAttribute.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** PageAttribute.java	12 Apr 2005 11:27:41 -0000	1.3
--- PageAttribute.java	15 May 2005 11:49:04 -0000	1.4
***************
*** 28,32 ****
  
  import org.htmlparser.Attribute;
- import org.htmlparser.lexer.Page;
  
  /**
--- 28,31 ----

Index: Cursor.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** Cursor.java	12 Apr 2005 11:27:41 -0000	1.19
--- Cursor.java	15 May 2005 11:49:04 -0000	1.20
***************
*** 167,170 ****
      }
  }
- 
-        
\ No newline at end of file
--- 167,168 ----

[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Cursor.java,1.19,1.20 InputStreamSource.java,1.

[Htmlparser-cvs] htmlparser/src/org/htmlparser/lexer Cursor.java,1.19,1.20 InputStreamSource.java,1.6,1.7 Lexer.java,1.38,1.39 Page.java,1.49,1.50 PageAttribute.java,1.3,1.4 PageIndex.java,1.17,1.18 Source.java,1.19,1.20 Stream.java,1.13,1.14 StringSource.java,1.3,1.4