[Htmlparser-cvs] htmlparser/src/org/htmlparser/nodes AbstractNode.java,1.3,1.4 RemarkNode.java,1.3,1

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30655/htmlparser/src/org/htmlparser/nodes

Modified Files:
	AbstractNode.java RemarkNode.java TagNode.java TextNode.java 
	package.html 
Log Message:
Documentation revamp part one.
Deprecated node decorators.
Added doSemanticAction for Text and Comment nodes.
Added missing sitecapturer scripts.
Fixed DOS batch files to work when called from any location.

Index: package.html
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/package.html,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** package.html	24 May 2004 16:18:37 -0000	1.1
--- package.html	10 Apr 2005 23:20:44 -0000	1.2
***************
*** 58,66 ****
  and child and parent references. Only the {@link org.htmlparser.nodes.TagNode TagNode} objects
  contain a list of {@link org.htmlparser.Attribute Attribute} objects.
! <p>
! The {@link org.htmlparser.lexer.Lexer Lexer} parses an HTML stream into a contiguous stream of these
! nodes. The {@link org.htmlparser.Parser Parser} returns specific {@link
! org.htmlparser.tags Tag} objects, which are subclasses of the {@link org.htmlparser.nodes.TagNode TagNode}
! class.
  <p>
  </BODY>
--- 58,67 ----
  and child and parent references. Only the {@link org.htmlparser.nodes.TagNode TagNode} objects
  contain a list of {@link org.htmlparser.Attribute Attribute} objects.
! <p>The {@link org.htmlparser.lexer.Lexer Lexer} parses an HTML stream into a
! contiguous stream of these nodes.</p>
! <p>The {@link org.htmlparser.Parser Parser} returns either these nodes or specific
! {@link org.htmlparser.tags Tag} objects (which are subclasses of TagNode)
! for tags with names that have been registered via
! {@link org.htmlparser.PrototypicalNodeFactory#registerTag registerTag()}.
  <p>
  </BODY>

Index: TextNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/TextNode.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** TextNode.java	17 Jul 2004 13:45:04 -0000	1.3
--- TextNode.java	10 Apr 2005 23:20:44 -0000	1.4
***************
*** 71,75 ****

      /**
!      * Returns the text of the string line.
       */
      public String getText ()
--- 71,77 ----

      /**
!      * Returns the text of the node.
!      * This is the same as {@link #toHtml} for this type of node.
!      * @return The contents of this text node.
       */
      public String getText ()
***************
*** 89,92 ****
--- 91,99 ----
      }

+     /**
+      * Returns the text of the node.
+      * This is the same as {@link #toHtml} for this type of node.
+      * @return The contents of this text node.
+      */
      public String toPlainTextString ()
      {
***************
*** 94,97 ****
--- 101,108 ----
      }

+     /**
+      * Returns the text of the node.
+      * @return The contents of this text node.
+      */
      public String toHtml ()
      {

Index: AbstractNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/AbstractNode.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** AbstractNode.java	17 Jul 2004 13:45:04 -0000	1.3
--- AbstractNode.java	10 Apr 2005 23:20:44 -0000	1.4
***************
*** 37,41 ****

  /**
!  * AbstractNode, which implements the Node interface, is the base class for all types of nodes, including tags, string elements, etc
   */
  public abstract class AbstractNode implements Node, Serializable
--- 37,44 ----

  /**
!  * The concrete base class for all types of nodes (tags, text remarks).
!  * This class provides basic functionality to hold the {@link Page}, the
!  * starting and ending position in the page, the parent and the list of
!  * {@link NodeList children}.
   */
  public abstract class AbstractNode implements Node, Serializable
***************
*** 95,130 ****

      /**
!      * Returns a string representation of the node. This is an important method, it allows a simple string transformation
!      * of a web page, regardless of a node.<br>
!      * Typical application code (for extracting only the text from a web page) would then be simplified to  :<br>
       * <pre>
       * Node node;
!      * for (Enumeration e = parser.elements();e.hasMoreElements();) {
!      *    node = (Node)e.nextElement();
!      *    System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string
       * }
       * </pre>
       */
!     public abstract String toPlainTextString();

      /**
!      * This method will make it easier when using html parser to reproduce html pages (with or without modifications)
!      * Applications reproducing html can use this method on nodes which are to be used or transferred as they were
!      * recieved, with the original html
       */
!     public abstract String toHtml();

      /**
!      * Return the string representation of the node.
       * Subclasses must define this method, and this is typically to be used in the manner<br>
       * <pre>System.out.println(node)</pre>
!      * @return java.lang.String
       */
!     public abstract String toString();

      /**
       * Collect this node and its child nodes (if-applicable) into the collectionList parameter, provided the node
       * satisfies the filtering criteria.<P>
!      *
       * This mechanism allows powerful filtering code to be written very easily,
       * without bothering about collection of embedded tags separately.
--- 98,141 ----

      /**
!      * Returns a string representation of the node.
!      * It allows a simple string transformation
!      * of a web page, regardless of node type.<br>
!      * Typical application code (for extracting only the text from a web page)
!      * would then be simplified to:<br>
       * <pre>
       * Node node;
!      * for (Enumeration e = parser.elements (); e.hasMoreElements (); )
!      * {
!      *     node = (Node)e.nextElement();
!      *     System.out.println (node.toPlainTextString ());
!      *     // or do whatever processing you wish with the plain text string
       * }
       * </pre>
+      * @return The 'browser' content of this node.
       */
!     public abstract String toPlainTextString ();

      /**
!      * Return the HTML that generated this node.
!      * This method will make it easier when using html parser to reproduce html
!      * pages (with or without modifications).
!      * Applications reproducing html can use this method on nodes which are to
!      * be used or transferred as they were recieved, with the original html.
!      * @return The HTML code for this node.
       */
!     public abstract String toHtml ();

      /**
!      * Return a string representation of the node.
       * Subclasses must define this method, and this is typically to be used in the manner<br>
       * <pre>System.out.println(node)</pre>
!      * @return A textual representation of the node suitable for debugging
       */
!     public abstract String toString ();

      /**
       * Collect this node and its child nodes (if-applicable) into the collectionList parameter, provided the node
       * satisfies the filtering criteria.<P>
!      * 
       * This mechanism allows powerful filtering code to be written very easily,
       * without bothering about collection of embedded tags separately.
***************
*** 134,138 ****
       * current node is a {@link org.htmlparser.tags.CompositeTag}, and going through its children.
       * So this method provides a convenient way to do this.<P>
!      *
       * Using collectInto(), programs get a lot shorter. Now, the code to
       * extract all links from a page would look like:
--- 145,149 ----
       * current node is a {@link org.htmlparser.tags.CompositeTag}, and going through its children.
       * So this method provides a convenient way to do this.<P>
!      * 
       * Using collectInto(), programs get a lot shorter. Now, the code to
       * extract all links from a page would look like:
***************
*** 145,149 ****
       * Thus, collectionList will hold all the link nodes, irrespective of how
       * deep the links are embedded.<P>
!      *
       * Another way to accomplish the same objective is:
       * <pre>
--- 156,160 ----
       * Thus, collectionList will hold all the link nodes, irrespective of how
       * deep the links are embedded.<P>
!      * 
       * Another way to accomplish the same objective is:
       * <pre>
***************
*** 155,158 ****
--- 166,171 ----
       * This is slightly less specific because the LinkTag class may be
       * registered for more than one node name, e.g. &lt;LINK&gt; tags too.
+      * @param list The node list to collect acceptable nodes into.
+      * @param filter The filter to determine which nodes are retained.
       */
      public void collectInto (NodeList list, NodeFilter filter)
***************
*** 163,184 ****

      /**
-      * Returns the beginning position of the tag.
-      * @deprecated Use {@link #getStartPosition}.
-      */
-     public int elementBegin()
-     {
-         return (getStartPosition ());
-     }
- 
-     /**
-      * Returns the ending position fo the tag
-      * @deprecated Use {@link #getEndPosition}.
-      */
-     public int elementEnd()
-     {
-         return (getEndPosition ());
-     }
- 
-     /**
       * Get the page this node came from.
       * @return The page that supplied this node.
--- 176,179 ----
***************
*** 234,245 ****
      }

-     public abstract void accept (NodeVisitor visitor);
- 
      /**
!      * @deprecated - use toHtml() instead
       */
!     public final String toHTML() {
!         return toHtml();
!     }

      /**
--- 229,237 ----
      }

      /**
!      * Visit this node.
!      * @param visitor The visitor that is visiting this node.
       */
!     public abstract void accept (NodeVisitor visitor);

      /**
***************
*** 283,289 ****

      /**
!      * Returns the text of the string line
       */
!     public String getText() {
          return null;
      }
--- 275,283 ----

      /**
!      * Returns the text of the node.
!      * @return The text of this node. The default is <code>null</code>.
       */
!     public String getText ()
!     {
          return null;
      }
***************
*** 293,298 ****
       * @param text The new text for the node.
       */
!     public void setText(String text) {
! 
      }

--- 287,292 ----
       * @param text The new text for the node.
       */
!     public void setText(String text)
!     {
      }

***************
*** 300,305 ****
       * Perform the meaning of this tag.
       * The default action is to do nothing.
       */
!     public void doSemanticAction () throws ParserException
      {
      }
--- 294,303 ----
       * Perform the meaning of this tag.
       * The default action is to do nothing.
+      * @exception ParserException <em>Not used.</em> Provides for subclasses
+      * that may want to indicate an exceptional condition.
       */
!     public void doSemanticAction ()
!         throws
!             ParserException
      {
      }

Index: TagNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/TagNode.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** TagNode.java	31 Jul 2004 16:42:35 -0000	1.5
--- TagNode.java	10 Apr 2005 23:20:44 -0000	1.6
***************
*** 319,328 ****
      }

!     /*
!      * Sets the attributes.
!      * @param attribs The attribute collection to set.
!      * Each element is an {@link Attribute Attribute}.
!      * The first attribute in the list must be the tag name (
!      * <code>isStandalone()</code> returns <code>true</code>).
       */
      public void setAttributeEx (Attribute attribute)
--- 319,326 ----
      }

!     /**
!      * Set an attribute.
!      * @param attribute The attribute to set.
!      * @see #setAttribute(Attribute)
       */
      public void setAttributeEx (Attribute attribute)
***************
*** 374,387 ****

      /**
-      * Eqivalent to <code>getAttribute (name)</code>.
-      * @param name Name of attribute.
-      * @deprecated use getAttribute instead
-      */
-     public String getParameter (String name)
-     {
-         return (getAttribute (name));
-     }
- 
-     /**
       * Gets the attributes in the tag.
       * @return Returns the list of {@link Attribute Attributes} in the tag.
--- 372,375 ----
***************
*** 533,537 ****
          String ret;

-         //ret = mPage.getText (elementBegin () + 1, elementEnd () - 1);
          ret = toHtml ();
          ret = ret.substring (1, ret.length () - 1);
--- 521,524 ----
***************
*** 766,769 ****
--- 753,757 ----
       * Based on <code>isEndTag()</code>, calls either <code>visitTag()</code> or
       * <code>visitEndTag()</code>.
+      * @param visitor The visitor that is visiting this node.
       */
      public void accept (NodeVisitor visitor)

Index: RemarkNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodes/RemarkNode.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** RemarkNode.java	17 Jul 2004 13:45:04 -0000	1.3
--- RemarkNode.java	10 Apr 2005 23:20:44 -0000	1.4
***************
*** 109,118 ****
      }

!     public String toPlainTextString()
      {
          return (getText());
      }
!     
!     public String toHtml()
      {
          StringBuffer buffer;
--- 109,126 ----
      }

!     /**
!      * Return the remark text.
!      * @return The HTML comment.
!      */
!     public String toPlainTextString ()
      {
          return (getText());
      }
! 
!     /**
!      * Return The full HTML remark.
!      * @return The comment, i.e. {@.html <!-- this is a comment -->}.
!      */
!     public String toHtml ()
      {
          StringBuffer buffer;

[Htmlparser-cvs] htmlparser/src/org/htmlparser/nodes AbstractNode.java,1.3,1.4 RemarkNode.java,1.3,1

[Htmlparser-cvs] htmlparser/src/org/htmlparser/nodes AbstractNode.java,1.3,1.4 RemarkNode.java,1.3,1.4 TagNode.java,1.5,1.6 TextNode.java,1.3,1.4 package.html,1.1,1.2