Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser Node.java,1.52,1.53 Parser.java,1.104,1.105 Prototypi

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5117/htmlparser/src/org/htmlparser

Modified Files:
	Node.java Parser.java PrototypicalNodeFactory.java 
Log Message:
Documentation revamp part three.
Reworked some JavaDoc descriptions.
Added "HTML Parser for dummies" introductory text.
Removed checkstyle.jar and fit.jar (and it's cruft).

Index: Parser.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v
retrieving revision 1.104
retrieving revision 1.105
diff -C2 -d -r1.104 -r1.105
*** Parser.java	5 Apr 2005 00:48:10 -0000	1.104
--- Parser.java	24 Apr 2005 17:48:27 -0000	1.105
***************
*** 117,121 ****
       * The floating point version number ({@value}).
       */
!     public final static double
      VERSION_NUMBER = 1.5
      ;
--- 117,121 ----
       * The floating point version number ({@value}).
       */
!     public static final double
      VERSION_NUMBER = 1.5
      ;
***************
*** 124,128 ****
       * The type of version ({@value}).
       */
!     public final static String
      VERSION_TYPE = "Integration Build"
      ;
--- 124,128 ----
       * The type of version ({@value}).
       */
!     public static final String
      VERSION_TYPE = "Integration Build"
      ;
***************
*** 131,146 ****
       * The date of the version ({@value}).
       */
!     public final static String
      VERSION_DATE = "Mar 13, 2005"
      ;

      /**
       * The display version ({@value}).
       */
!     public final static String
!     VERSION_STRING = "" + VERSION_NUMBER + " (" + VERSION_TYPE + " " + VERSION_DATE + ")"
!     ;
! 
!     // End of formatting

      /**
--- 131,146 ----
       * The date of the version ({@value}).
       */
!     public static final String
      VERSION_DATE = "Mar 13, 2005"
      ;

+     // End of formatting
+ 
      /**
       * The display version ({@value}).
       */
!     public static final String VERSION_STRING =
!             "" + VERSION_NUMBER
!             + " (" + VERSION_TYPE + " " + VERSION_DATE + ")";

      /**
***************
*** 158,162 ****
       * Use this for no feedback.
       */
!     public static ParserFeedback noFeedback = new DefaultParserFeedback (DefaultParserFeedback.QUIET);

      /**
--- 158,163 ----
       * Use this for no feedback.
       */
!     public static final ParserFeedback DEVNULL =
!         new DefaultParserFeedback (DefaultParserFeedback.QUIET);

      /**
***************
*** 164,168 ****
       * Use this for output on <code>System.out</code>.
       */
!     public static ParserFeedback stdout = new DefaultParserFeedback ();

      //
--- 165,169 ----
       * Use this for output on <code>System.out</code>.
       */
!     public static final ParserFeedback STDOUT = new DefaultParserFeedback ();

      //
***************
*** 243,247 ****
      public Parser ()
      {
!         this (new Lexer (new Page ("")), noFeedback);
      }

--- 244,248 ----
      public Parser ()
      {
!         this (new Lexer (new Page ("")), DEVNULL);
      }

***************
*** 272,276 ****
       * method will be called so it need not be connected yet.
       * @param fb The object to use for message communication.
!      * @throws ParserException If the creation of the underlying Lexer cannot be performed.
       */
      public Parser (URLConnection connection, ParserFeedback fb)
--- 273,278 ----
       * method will be called so it need not be connected yet.
       * @param fb The object to use for message communication.
!      * @throws ParserException If the creation of the underlying Lexer
!      * cannot be performed.
       */
      public Parser (URLConnection connection, ParserFeedback fb)
***************
*** 283,287 ****
      /**
       * Creates a Parser object with the location of the resource (URL or file)
!      * You would typically create a DefaultHTMLParserFeedback object and pass it in.
       * @see #Parser(URLConnection,ParserFeedback)
       * @param resourceLocn Either the URL or the filename (autodetects).
--- 285,290 ----
      /**
       * Creates a Parser object with the location of the resource (URL or file)
!      * You would typically create a DefaultHTMLParserFeedback object and pass
!      * it in.
       * @see #Parser(URLConnection,ParserFeedback)
       * @param resourceLocn Either the URL or the filename (autodetects).
***************
*** 292,296 ****
       * @throws ParserException If the URL is invalid.
       */
!     public Parser (String resourceLocn, ParserFeedback feedback) throws ParserException
      {
          this (getConnectionManager ().openConnection (resourceLocn), feedback);
--- 295,301 ----
       * @throws ParserException If the URL is invalid.
       */
!     public Parser (String resourceLocn, ParserFeedback feedback)
!         throws
!             ParserException
      {
          this (getConnectionManager ().openConnection (resourceLocn), feedback);
***************
*** 301,314 ****
       * A DefaultHTMLParserFeedback object is used for feedback.
       * @param resourceLocn Either the URL or the filename (autodetects).
!      * @throws ParserException If the resourceLocn argument does not resolve to a valid page or file.
       */
      public Parser (String resourceLocn) throws ParserException
      {
!         this (resourceLocn, stdout);
      }

      /**
       * Construct a parser using the provided lexer.
!      * A feedback object printing to {@link #stdout System.out} is used.
       * This would be used to create a parser for special cases where the
       * normal creation of a lexer on a URLConnection needs to be customized.
--- 306,320 ----
       * A DefaultHTMLParserFeedback object is used for feedback.
       * @param resourceLocn Either the URL or the filename (autodetects).
!      * @throws ParserException If the resourceLocn argument does not resolve
!      * to a valid page or file.
       */
      public Parser (String resourceLocn) throws ParserException
      {
!         this (resourceLocn, STDOUT);
      }

      /**
       * Construct a parser using the provided lexer.
!      * A feedback object printing to {@link #STDOUT System.out} is used.
       * This would be used to create a parser for special cases where the
       * normal creation of a lexer on a URLConnection needs to be customized.
***************
*** 317,321 ****
      public Parser (Lexer lexer)
      {
!         this (lexer, stdout);
      }

--- 323,327 ----
      public Parser (Lexer lexer)
      {
!         this (lexer, STDOUT);
      }

***************
*** 325,337 ****
       * a special setup or negotiation conditioning beyond what is available
       * from the {@link #getConnectionManager ConnectionManager}.
!      * A feedback object printing to {@link #stdout System.out} is used.
       * @see #Parser(URLConnection,ParserFeedback)
       * @param connection A fully conditioned connection. The connect()
       * method will be called so it need not be connected yet.
!      * @throws ParserException If the creation of the underlying Lexer cannot be performed.
       */
      public Parser (URLConnection connection) throws ParserException
      {
!         this (connection, stdout);
      }

--- 331,344 ----
       * a special setup or negotiation conditioning beyond what is available
       * from the {@link #getConnectionManager ConnectionManager}.
!      * A feedback object printing to {@link #STDOUT System.out} is used.
       * @see #Parser(URLConnection,ParserFeedback)
       * @param connection A fully conditioned connection. The connect()
       * method will be called so it need not be connected yet.
!      * @throws ParserException If the creation of the underlying Lexer
!      * cannot be performed.
       */
      public Parser (URLConnection connection) throws ParserException
      {
!         this (connection, STDOUT);
      }

***************
*** 412,416 ****
          getLexer ().getPage ().setEncoding (encoding);
      }
!         
      /**
       * Get the encoding for the page this parser is reading from.
--- 419,423 ----
          getLexer ().getPage ().setEncoding (encoding);
      }
! 
      /**
       * Get the encoding for the page this parser is reading from.
***************
*** 488,496 ****
       * Sets the feedback object used in scanning.
       * @param fb The new feedback object to use. If this is null a
!      * {@link #noFeedback silent feedback object} is used.
       */
      public void setFeedback (ParserFeedback fb)
      {
!         mFeedback = (null == fb) ? noFeedback : fb;
      }

--- 495,506 ----
       * Sets the feedback object used in scanning.
       * @param fb The new feedback object to use. If this is null a
!      * {@link #DEVNULL silent feedback object} is used.
       */
      public void setFeedback (ParserFeedback fb)
      {
!         if (null == fb)
!             mFeedback = DEVNULL;
!         else
!             mFeedback = fb;
      }

***************
*** 512,515 ****
--- 522,534 ----
       * This assumes support for a reset from the underlying
       * {@link org.htmlparser.lexer.Source} object.
+      * <p>This is cheaper (in terms of time) than resetting the URL, i.e.
+      * <pre>
+      * parser.setURL (parser.getURL ());
+      * </pre>
+      * because the page is not refetched from the internet.
+      * <em>Note: the nodes returned on the second parse are new
+      * nodes and not the same nodes returned on the first parse. If you
+      * want the same nodes for re-use, collect them in a NodeList with
+      * {@link #parse(NodeFilter) parse(null)} and operate on the NodeList.</em>
       */
      public void reset ()
***************
*** 552,562 ****
       *         // ...
       *         // process recursively (nodes within nodes) via getChildren()
!      *         NodeList list = tag.getChildren ();
!      *         if (null != list)
!      *             for (NodeIterator i = list.elements (); i.hasMoreElements (); )
       *                 processMyNodes (i.nextNode ());
       *     }
       * }
!      * 
       * Parser parser = new Parser ("http://www.yahoo.com");
       * for (NodeIterator i = parser.elements (); i.hasMoreElements (); )
--- 571,581 ----
       *         // ...
       *         // process recursively (nodes within nodes) via getChildren()
!      *         NodeList nl = tag.getChildren ();
!      *         if (null != nl)
!      *             for (NodeIterator i = nl.elements (); i.hasMoreElements (); )
       *                 processMyNodes (i.nextNode ());
       *     }
       * }
!      *
       * Parser parser = new Parser ("http://www.yahoo.com");
       * for (NodeIterator i = parser.elements (); i.hasMoreElements (); )
***************
*** 574,577 ****
--- 593,620 ----
      /**
       * Parse the given resource, using the filter provided.
+      * This can be used to extract information from specific nodes.
+      * When used with a <code>null</code> filter it returns an
+      * entire page which can then be modified and converted back to HTML
+      * (Note: the synthesis use-case is not handled very well; the parser
+      * is more often used to extract information from a web page).
+      * <p>For example, to replace the entire contents of the HEAD with a
+      * single TITLE tag you could do this:
+      * <pre>
+      * NodeList nl = parser.parse (null); // here is your two node list
+      * NodeList heads = nl.extractAllNodesThatMatch (new TagNameFilter ("HEAD"))
+      * if (heads.size () > 0) // there may not be a HEAD tag
+      * {
+      *     Head head = heads.elementAt (0); // there should be only one
+      *     head.removeAll (); // clean out the contents
+      *     Tag title = new TitleTag ();
+      *     title.setTagName ("title");
+      *     title.setChildren (new NodeList (new TextNode ("The New Title")));
+      *     Tag title_end = new TitleTag ();
+      *     title_end.setTagName ("/title");
+      *     title.setEndTag (title_end);
+      *     head.add (title);
+      * }
+      * System.out.println (nl.toHtml ()); // output the modified HTML
+      * </pre>
       * @return The list of matching nodes (for a <code>null</code>
       * filter this is all the top level nodes).
***************
*** 595,599 ****
                  ret.add (node);
          }
!         
          return (ret);
      }
--- 638,642 ----
                  ret.add (node);
          }
! 
          return (ret);
      }
***************
*** 606,615 ****
       * page and <code>finishedParsing()</code> is called after the processing.
       * @param visitor The visitor to visit all nodes with.
!      * @throws ParserException If a parse error occurs while traversing the page with the visitor.
       */
!     public void visitAllNodesWith (NodeVisitor visitor) throws ParserException {
          Node node;
          visitor.beginParsing();
!         for (NodeIterator e = elements();e.hasMoreNodes();) {
              node = e.nextNode();
              node.accept(visitor);
--- 649,661 ----
       * page and <code>finishedParsing()</code> is called after the processing.
       * @param visitor The visitor to visit all nodes with.
!      * @throws ParserException If a parse error occurs while traversing
!      * the page with the visitor.
       */
!     public void visitAllNodesWith (NodeVisitor visitor) throws ParserException
!     {
          Node node;
          visitor.beginParsing();
!         for (NodeIterator e = elements(); e.hasMoreNodes(); )
!         {
              node = e.nextNode();
              node.accept(visitor);
***************
*** 621,625 ****
       * Initializes the parser with the given input HTML String.
       * @param inputHTML the input HTML that is to be parsed.
!      * @throws ParserException If a error occurs in setting up the underlying Lexer.
       */
      public void setInputHTML (String inputHTML)
--- 667,672 ----
       * Initializes the parser with the given input HTML String.
       * @param inputHTML the input HTML that is to be parsed.
!      * @throws ParserException If a error occurs in setting up the
!      * underlying Lexer.
       */
      public void setInputHTML (String inputHTML)
***************
*** 642,650 ****
       * returned <code>true</code>.
       */
!     public NodeList extractAllNodesThatMatch (NodeFilter filter) throws ParserException
      {
          NodeIterator e;
          NodeList ret;
!         
          ret = new NodeList ();
          for (e = elements (); e.hasMoreNodes (); )
--- 689,699 ----
       * returned <code>true</code>.
       */
!     public NodeList extractAllNodesThatMatch (NodeFilter filter)
!         throws
!             ParserException
      {
          NodeIterator e;
          NodeList ret;
! 
          ret = new NodeList ();
          for (e = elements (); e.hasMoreNodes (); )
***************
*** 656,664 ****
      /**
       * Convenience method to extract all nodes of a given class type.
!      * Equivalent to <code>extractAllNodesThatMatch (new NodeClassFilter (nodeType))</code>.
       * @param nodeType The class of the nodes to collect.
       * @throws ParserException If a parse error occurs.
       * @return A list of nodes which have the class specified.
!      * @deprecated Use extractAllNodesThatMatch (new NodeClassFilter (nodeType)).
       * @see #extractAllNodesThatAre
       */
--- 705,714 ----
      /**
       * Convenience method to extract all nodes of a given class type.
!      * Equivalent to
!      * <code>extractAllNodesThatMatch (new NodeClassFilter (nodeType))</code>.
       * @param nodeType The class of the nodes to collect.
       * @throws ParserException If a parse error occurs.
       * @return A list of nodes which have the class specified.
!      * @deprecated Use extractAllNodesThatMatch (new NodeClassFilter (cls)).
       * @see #extractAllNodesThatAre
       */
***************
*** 669,673 ****
          NodeList ret;

!         ret = extractAllNodesThatMatch (new NodeClassFilter (nodeType)); 

          return (ret.toNodeArray ());
--- 719,723 ----
          NodeList ret;

!         ret = extractAllNodesThatMatch (new NodeClassFilter (nodeType));

          return (ret.toNodeArray ());
***************
*** 690,695 ****
              ParserException
      {
!         if (null != getFeedback ())
!             getFeedback ().info (ConnectionManager.getRequestHeader (connection));
      }

--- 740,744 ----
              ParserException
      {
!         getFeedback ().info (ConnectionManager.getRequestHeader (connection));
      }

***************
*** 706,711 ****
              ParserException
      {
!         if (null != getFeedback ())
!             getFeedback ().info (ConnectionManager.getResponseHeader (connection));
      }

--- 755,759 ----
              ParserException
      {
!         getFeedback ().info (ConnectionManager.getResponseHeader (connection));
      }

***************
*** 724,738 ****
              System.out.println ("HTML Parser v" + VERSION_STRING + "\n");
              System.out.println ();
!             System.out.println ("Syntax : java -jar htmlparser.jar <resourceLocn/website> [node_type]");
!             System.out.println ("   <resourceLocn/website> the URL or file to be parsed");
!             System.out.println ("   node_type an optional node name, for example:");
!             System.out.println ("     A - Show only the link tags extracted from the document");
!             System.out.println ("     IMG - Show only the image tags extracted from the document");
!             System.out.println ("     TITLE - Extract the title from the document");
!             System.out.println ();
!             System.out.println ("Example : java -jar htmlparser.jar http://www.yahoo.com");
              System.out.println ();
!             System.out.println ("For support, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page...");
!             System.out.println ("HTML Parser home page : http://htmlparser.org");
              System.out.println ();
          }
--- 772,785 ----
              System.out.println ("HTML Parser v" + VERSION_STRING + "\n");
              System.out.println ();
!             System.out.println ("Syntax : java -jar htmlparser.jar"
!                     + " <file/page> [type]");
!             System.out.println ("   <file/page> the URL or file to be parsed");
!             System.out.println ("   type the node type, for example:");
!             System.out.println ("     A - Show only the link tags");
!             System.out.println ("     IMG - Show only the image tags");
!             System.out.println ("     TITLE - Show only the title tag");
              System.out.println ();
!             System.out.println ("Example : java -jar htmlparser.jar"
!                     + " http://www.yahoo.com");
              System.out.println ();
          }
***************
*** 746,750 ****
                  {   // for a simple dump, use more verbose settings
                      filter = null;
!                     parser.setFeedback (Parser.stdout);
                      getConnectionManager ().setMonitor (parser);
                  }
--- 793,797 ----
                  {   // for a simple dump, use more verbose settings
                      filter = null;
!                     parser.setFeedback (Parser.STDOUT);
                      getConnectionManager ().setMonitor (parser);
                  }

Index: Node.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v
retrieving revision 1.52
retrieving revision 1.53
diff -C2 -d -r1.52 -r1.53
*** Node.java	10 Apr 2005 23:20:42 -0000	1.52
--- Node.java	24 Apr 2005 17:48:27 -0000	1.53
***************
*** 37,41 ****
   * to define your own nodes to be returned by the
   * {@link org.htmlparser.lexer.Lexer} or {@link Parser}, but each of the types
!  * must support this interface. 
   * More specific interface requirements for each of the node types are specified
   * by the {@link Text}, {@link Remark} and {@link Tag} interfaces.
--- 37,41 ----
   * to define your own nodes to be returned by the
   * {@link org.htmlparser.lexer.Lexer} or {@link Parser}, but each of the types
!  * must support this interface.
   * More specific interface requirements for each of the node types are specified
   * by the {@link Text}, {@link Remark} and {@link Tag} interfaces.
***************
*** 65,69 ****
       * @return The text of this node including it's children.
       */
!     public abstract String toPlainTextString ();

      /**
--- 65,69 ----
       * @return The text of this node including it's children.
       */
!     String toPlainTextString ();

      /**
***************
*** 71,75 ****
       * This should be the exact sequence of characters that were encountered by
       * the parser that caused this node to be created. Where this breaks down is
!      * where broken nodes (tags and remarks) have been encountered and fixed. 
       * Applications reproducing html can use this method on nodes which are to
       * be used or transferred as they were received or created.
--- 71,75 ----
       * This should be the exact sequence of characters that were encountered by
       * the parser that caused this node to be created. Where this breaks down is
!      * where broken nodes (tags and remarks) have been encountered and fixed.
       * Applications reproducing html can use this method on nodes which are to
       * be used or transferred as they were received or created.
***************
*** 77,86 ****
       * to be returned by the parser or lexer.
       */
!     public abstract String toHtml ();

      /**
       * Return the string representation of the node.
       * The return value may not be the entire contents of the node, and non-
!      * printable characters may be translated in order to make them visible. 
       * This is typically to be used in
       * the manner<br>
--- 77,86 ----
       * to be returned by the parser or lexer.
       */
!     String toHtml ();

      /**
       * Return the string representation of the node.
       * The return value may not be the entire contents of the node, and non-
!      * printable characters may be translated in order to make them visible.
       * This is typically to be used in
       * the manner<br>
***************
*** 92,110 ****
       * that isn't too large.
       */
!     public abstract String toString ();

      /**
!      * Collect this node and its child nodes (if applicable) into a list, provided the node
!      * satisfies the filtering criteria.<P>
!      *
!      * This mechanism allows powerful filtering code to be written very easily,
!      * without bothering about collection of embedded tags separately.
       * e.g. when we try to get all the links on a page, it is not possible to
       * get it at the top-level, as many tags (like form tags), can contain
       * links embedded in them. We could get the links out by checking if the
       * current node is a {@link org.htmlparser.tags.CompositeTag}, and going
!      * through its children. So this method provides a convenient way to do this.<P>
!      *
!      * Using collectInto(), programs get a lot shorter. Now, the code to
       * extract all links from a page would look like:
       * <pre>
--- 92,109 ----
       * that isn't too large.
       */
!     String toString ();

      /**
!      * Collect this node and its child nodes into a list, provided the node
!      * satisfies the filtering criteria.
!      * <p>This mechanism allows powerful filtering code to be written very
!      * easily, without bothering about collection of embedded tags separately.
       * e.g. when we try to get all the links on a page, it is not possible to
       * get it at the top-level, as many tags (like form tags), can contain
       * links embedded in them. We could get the links out by checking if the
       * current node is a {@link org.htmlparser.tags.CompositeTag}, and going
!      * through its children. So this method provides a convenient way to do
!      * this.</p>
!      * <p>Using collectInto(), programs get a lot shorter. Now, the code to
       * extract all links from a page would look like:
       * <pre>
***************
*** 115,121 ****
       * </pre>
       * Thus, <code>list</code> will hold all the link nodes, irrespective of how
!      * deep the links are embedded.<P>
!      *
!      * Another way to accomplish the same objective is:
       * <pre>
       * NodeList list = new NodeList ();
--- 114,119 ----
       * </pre>
       * Thus, <code>list</code> will hold all the link nodes, irrespective of how
!      * deep the links are embedded.</p>
!      * <p>Another way to accomplish the same objective is:
       * <pre>
       * NodeList list = new NodeList ();
***************
*** 128,134 ****
       * @param list The list to collect nodes into.
       * @param filter The criteria to use when deciding if a node should
!      * be added to the list.
       */
!     public abstract void collectInto (NodeList list, NodeFilter filter);

      /**
--- 126,132 ----
       * @param list The list to collect nodes into.
       * @param filter The criteria to use when deciding if a node should
!      * be added to the list.</p>
       */
!     void collectInto (NodeList list, NodeFilter filter);

      /**
***************
*** 137,141 ****
       * @return The start position.
       */
!     public abstract int getStartPosition ();

      /**
--- 135,139 ----
       * @return The start position.
       */
!     int getStartPosition ();

      /**
***************
*** 143,147 ****
       * @param position The new start position.
       */
!     public abstract void setStartPosition (int position);

      /**
--- 141,145 ----
       * @param position The new start position.
       */
!     void setStartPosition (int position);

      /**
***************
*** 151,155 ****
       * @return The end position.
       */
!     public abstract int getEndPosition ();

      /**
--- 149,153 ----
       * @return The end position.
       */
!     int getEndPosition ();

      /**
***************
*** 157,161 ****
       * @param position The new end position.
       */
!     public abstract void setEndPosition (int position);

      /**
--- 155,159 ----
       * @param position The new end position.
       */
!     void setEndPosition (int position);

      /**
***************
*** 163,167 ****
       * @return The page that supplied this node.
       */
!     public Page getPage ();

      /**
--- 161,165 ----
       * @return The page that supplied this node.
       */
!     Page getPage ();

      /**
***************
*** 169,178 ****
       * @param page The page that supplied this node.
       */
!     public void setPage (Page page);
      /**
       * Apply the visitor to this node.
       * @param visitor The visitor to this node.
       */
!     public abstract void accept (NodeVisitor visitor);

      /**
--- 167,177 ----
       * @param page The page that supplied this node.
       */
!     void setPage (Page page);
! 
      /**
       * Apply the visitor to this node.
       * @param visitor The visitor to this node.
       */
!     void accept (NodeVisitor visitor);

      /**
***************
*** 186,190 ****
       * otherwise.
       */
!     public abstract Node getParent ();

      /**
--- 185,189 ----
       * otherwise.
       */
!     Node getParent ();

      /**
***************
*** 192,196 ****
       * @param node The node that contains this node.
       */
!     public abstract void setParent (Node node);

      /**
--- 191,195 ----
       * @param node The node that contains this node.
       */
!     void setParent (Node node);

      /**
***************
*** 199,203 ****
       * <code>null</code> otherwise.
       */
!     public abstract NodeList getChildren ();

      /**
--- 198,202 ----
       * <code>null</code> otherwise.
       */
!     NodeList getChildren ();

      /**
***************
*** 205,209 ****
       * @param children The new list of children this node contains.
       */
!     public abstract void setChildren (NodeList children);

      /**
--- 204,208 ----
       * @param children The new list of children this node contains.
       */
!     void setChildren (NodeList children);

      /**
***************
*** 212,216 ****
       * a tag, the contents of the tag less the enclosing angle brackets.
       */
!     public String getText ();

      /**
--- 211,215 ----
       * a tag, the contents of the tag less the enclosing angle brackets.
       */
!     String getText ();

      /**
***************
*** 218,222 ****
       * @param text The new text for the node.
       */
!     public void setText (String text);

      /**
--- 217,221 ----
       * @param text The new text for the node.
       */
!     void setText (String text);

      /**
***************
*** 231,236 ****
       * composite nodes (those that contain other nodes), the children will have
       * already been parsed and will be available via {@link #getChildren}.
       */
!     public void doSemanticAction ()
          throws
              ParserException;
--- 230,237 ----
       * composite nodes (those that contain other nodes), the children will have
       * already been parsed and will be available via {@link #getChildren}.
+      * @exception ParserException If a problem is encountered performing the
+      * semantic action.
       */
!     void doSemanticAction ()
          throws
              ParserException;
***************
*** 242,247 ****
      /**
       * Allow cloning of nodes.
!      * Creates and returns a copy of this object.  The precise meaning 
!      * of "copy" may depend on the class of the object. The general 
       * intent is that, for any object <tt>x</tt>, the expression:
       * <blockquote>
--- 243,248 ----
      /**
       * Allow cloning of nodes.
!      * Creates and returns a copy of this object.  The precise meaning
!      * of "copy" may depend on the class of the object. The general
       * intent is that, for any object <tt>x</tt>, the expression:
       * <blockquote>
***************
*** 252,261 ****
       * <pre>
       * x.clone().getClass() == x.getClass()</pre></blockquote>
!      * will be <tt>true</tt>, but these are not absolute requirements. 
       * While it is typically the case that:
       * <blockquote>
       * <pre>
       * x.clone().equals(x)</pre></blockquote>
!      * will be <tt>true</tt>, this is not an absolute requirement. 
       * <p>
       * By convention, the returned object should be obtained by calling
--- 253,262 ----
       * <pre>
       * x.clone().getClass() == x.getClass()</pre></blockquote>
!      * will be <tt>true</tt>, but these are not absolute requirements.
       * While it is typically the case that:
       * <blockquote>
       * <pre>
       * x.clone().equals(x)</pre></blockquote>
!      * will be <tt>true</tt>, this is not an absolute requirement.
       * <p>
       * By convention, the returned object should be obtained by calling
***************
*** 275,291 ****
       * need to be modified.
       * <p>
!      * The method <tt>clone</tt> for class <tt>Object</tt> performs a 
!      * specific cloning operation. First, if the class of this object does 
!      * not implement the interface <tt>Cloneable</tt>, then a 
!      * <tt>CloneNotSupportedException</tt> is thrown. Note that all arrays 
!      * are considered to implement the interface <tt>Cloneable</tt>. 
!      * Otherwise, this method creates a new instance of the class of this 
!      * object and initializes all its fields with exactly the contents of 
       * the corresponding fields of this object, as if by assignment; the
!      * contents of the fields are not themselves cloned. Thus, this method 
       * performs a "shallow copy" of this object, not a "deep copy" operation.
       * <p>
!      * The class <tt>Object</tt> does not itself implement the interface 
!      * <tt>Cloneable</tt>, so calling the <tt>clone</tt> method on an object 
       * whose class is <tt>Object</tt> will result in throwing an
       * exception at run time.
--- 276,292 ----
       * need to be modified.
       * <p>
!      * The method <tt>clone</tt> for class <tt>Object</tt> performs a
!      * specific cloning operation. First, if the class of this object does
!      * not implement the interface <tt>Cloneable</tt>, then a
!      * <tt>CloneNotSupportedException</tt> is thrown. Note that all arrays
!      * are considered to implement the interface <tt>Cloneable</tt>.
!      * Otherwise, this method creates a new instance of the class of this
!      * object and initializes all its fields with exactly the contents of
       * the corresponding fields of this object, as if by assignment; the
!      * contents of the fields are not themselves cloned. Thus, this method
       * performs a "shallow copy" of this object, not a "deep copy" operation.
       * <p>
!      * The class <tt>Object</tt> does not itself implement the interface
!      * <tt>Cloneable</tt>, so calling the <tt>clone</tt> method on an object
       * whose class is <tt>Object</tt> will result in throwing an
       * exception at run time.
***************
*** 299,303 ****
       * @see java.lang.Cloneable
       */
!     public Object clone ()
          throws
              CloneNotSupportedException;
--- 300,304 ----
       * @see java.lang.Cloneable
       */
!     Object clone ()
          throws
              CloneNotSupportedException;

Index: PrototypicalNodeFactory.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/PrototypicalNodeFactory.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** PrototypicalNodeFactory.java	10 Apr 2005 23:20:42 -0000	1.14
--- PrototypicalNodeFactory.java	24 Apr 2005 17:48:27 -0000	1.15
***************
*** 34,42 ****
  import java.util.Vector;

- import org.htmlparser.Attribute;
- import org.htmlparser.NodeFactory;
- import org.htmlparser.Remark;
- import org.htmlparser.Tag;
- import org.htmlparser.Text;
  import org.htmlparser.lexer.Page;
  import org.htmlparser.nodes.TextNode;
--- 34,37 ----
***************
*** 73,77 ****
  import org.htmlparser.tags.TextareaTag;
  import org.htmlparser.tags.TitleTag;
- import org.htmlparser.util.ParserException;

  /**
--- 68,71 ----
***************
*** 98,102 ****
   * explicitly.</p>
   * <p>Here is an example of how to override all text issued from
!  * {@link org.htmlparser.nodes.TextNode#toPlainTextString() Text.toPlainTextString()},
   * in this case decoding (converting character references),
   * which illustrates the use of setting the text prototype:
--- 92,97 ----
   * explicitly.</p>
   * <p>Here is an example of how to override all text issued from
!  * {@link org.htmlparser.nodes.TextNode#toPlainTextString()
!  * Text.toPlainTextString()},
   * in this case decoding (converting character references),
   * which illustrates the use of setting the text prototype:
***************
*** 108,112 ****
   *         public String toPlainTextString()
   *         {
!  *             return (org.htmlparser.util.Translate.decode (super.toPlainTextString ()));
   *         }
   *     });
--- 103,108 ----
   *         public String toPlainTextString()
   *         {
!  *             String original = super.toPlainTextString ();
!  *             return (org.htmlparser.util.Translate.decode (original));
   *         }
   *     });
***************
*** 208,211 ****
--- 204,209 ----
       * Adds a tag to the registry.
       * @param id The name under which to register the tag.
+      * <strong>For proper operation, the id should be uppercase so it
+      * will be matched by a Map lookup.</strong>
       * @param tag The tag to be returned from a {@link #createTagNode} call.
       * @return The tag previously registered with that id if any,
***************
*** 259,272 ****
       * Register a tag.
       * Registers the given tag under every {@link Tag#getIds() id} that the
!      * tag has.
       * @param tag The tag to register.
       */
      public void registerTag (Tag tag)
      {
!         String ids[];
!         
          ids = tag.getIds ();
          for (int i = 0; i < ids.length; i++)
!             put (ids[i], tag);
      }

--- 257,272 ----
       * Register a tag.
       * Registers the given tag under every {@link Tag#getIds() id} that the
!      * tag has (i.e. all names returned by {@link Tag#getIds() tag.getIds()}.
!      * <p><strong>For proper operation, the ids are converted to uppercase so
!      * they will be matched by a Map lookup.</strong>
       * @param tag The tag to register.
       */
      public void registerTag (Tag tag)
      {
!         String[] ids;
! 
          ids = tag.getIds ();
          for (int i = 0; i < ids.length; i++)
!             put (ids[i].toUpperCase (Locale.ENGLISH), tag);
      }

***************
*** 274,286 ****
       * Unregister a tag.
       * Unregisters the given tag from every {@link Tag#getIds() id} the tag has.
       * @param tag The tag to unregister.
       */
      public void unregisterTag (Tag tag)
      {
!         String ids[];
!         
          ids = tag.getIds ();
          for (int i = 0; i < ids.length; i++)
!             remove (ids[i]);
      }

--- 274,288 ----
       * Unregister a tag.
       * Unregisters the given tag from every {@link Tag#getIds() id} the tag has.
+      * <p><strong>The ids are converted to uppercase to undo the operation
+      * of registerTag.</strong>
       * @param tag The tag to unregister.
       */
      public void unregisterTag (Tag tag)
      {
!         String[] ids;
! 
          ids = tag.getIds ();
          for (int i = 0; i < ids.length; i++)
!             remove (ids[i].toUpperCase (Locale.ENGLISH));
      }

***************
*** 323,327 ****
          registerTag (new HeadTag ());
          registerTag (new Html ());
!         
          return (this);
      }
--- 325,329 ----
          registerTag (new HeadTag ());
          registerTag (new Html ());
! 
          return (this);
      }
***************
*** 440,444 ****
      {
          Remark ret;
!         
          try
          {
--- 442,446 ----
      {
          Remark ret;
! 
          try
          {
***************
*** 503,507 ****
                  catch (CloneNotSupportedException cnse)
                  {
!                     // default to creating a new one
                  }
              }
--- 505,509 ----
                  catch (CloneNotSupportedException cnse)
                  {
!                     // default to creating a generic one
                  }
              }

Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser Node.java,1.52,1.53 Parser.java,1.104,1.105 Prototypi

htmlparser-cvs