[Htmlparser-cvs] htmlparser/src/org/htmlparser AbstractNode.java,1.10,1.11 Node.java,1.36,1.37 NodeR

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser
In directory sc8-pr-cvs1:/tmp/cvs-serv31228

Modified Files:
	AbstractNode.java Node.java NodeReader.java RemarkNode.java 
	StringNode.java StringNodeFactory.java 
Log Message:
Change tabs to spaces in all source files.

Index: AbstractNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** AbstractNode.java	24 Aug 2003 21:59:41 -0000	1.10
--- AbstractNode.java	3 Sep 2003 23:36:18 -0000	1.11
***************
*** 37,54 ****
   */
  public abstract class AbstractNode implements Node, Serializable {
! 	/** 
! 	 * The beginning position of the tag in the line
! 	 */
! 	protected int nodeBegin;

! 	/**
! 	 * The ending position of the tag in the line
! 	 */
! 	protected int nodeEnd;

! 	/**
! 	 * The parent of this node.
! 	 */
! 	protected Node parent;

      /**
--- 37,54 ----
   */
  public abstract class AbstractNode implements Node, Serializable {
!     /** 
!      * The beginning position of the tag in the line
!      */
!     protected int nodeBegin;

!     /**
!      * The ending position of the tag in the line
!      */
!     protected int nodeEnd;

!     /**
!      * The parent of this node.
!      */
!     protected Node parent;

      /**
***************
*** 62,214 ****
       * @param end The ending position of the node.
       */
! 	public AbstractNode (int begin, int end)
      {
! 		nodeBegin = begin;
! 		nodeEnd   = end;
          parent = null;
! 	}

! 	/**
! 	 * Returns a string representation of the node. This is an important method, it allows a simple string transformation
! 	 * of a web page, regardless of a node.<br>
! 	 * Typical application code (for extracting only the text from a web page) would then be simplified to  :<br>
! 	 * <pre>
! 	 * Node node;
! 	 * for (Enumeration e = parser.elements();e.hasMoreElements();) {
! 	 *    node = (Node)e.nextElement();
! 	 *    System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string
! 	 * }
! 	 * </pre>
! 	 */
! 	public abstract String toPlainTextString();

! 	/**
! 	 * This method will make it easier when using html parser to reproduce html pages (with or without modifications)
! 	 * Applications reproducing html can use this method on nodes which are to be used or transferred as they were 
! 	 * recieved, with the original html
! 	 */
! 	public abstract String toHtml();

! 	/**
! 	 * Return the string representation of the node.
! 	 * Subclasses must define this method, and this is typically to be used in the manner<br>
! 	 * <pre>System.out.println(node)</pre>
! 	 * @return java.lang.String
! 	 */
! 	public abstract String toString();

! 	/**
! 	 * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! 	 * satisfies the filtering criteria. 
! 	 * 
! 	 * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! 	 * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! 	 * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! 	 * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! 	 * to specific tags, and is not a very clean approach. 
! 	 * 
! 	 * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! 	 * like :
! 	 * <pre>
! 	 * NodeList collectionList = new NodeList(); 
! 	 * Node node; 
! 	 * String filter = LinkTag.LINK_TAG_FILTER; 
! 	 * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! 	 * 		node = e.nextNode();
! 	 * 		node.collectInto (collectionVector, filter); 
! 	 * }
! 	 * </pre>
! 	 * Thus, collectionList will hold all the link nodes, irrespective of how
! 	 * deep the links are embedded. This of course implies that tags must
! 	 * fulfill their responsibilities toward honouring certain filters.
! 	 * 
! 	 * Important: In order to keep performance optimal, do not create you own filter strings, as 
! 	 * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not
! 	 * make calls like : 
! 	 * collectInto(collectionList,"-l"), instead, make calls only like :
! 	 * collectInto(collectionList,LinkTag.LINK_TAG_FILTER).
! 	 * 
! 	 * To find out if your desired tag has filtering support, check the API of the tag.
! 	 */
! 	public abstract void collectInto(NodeList collectionList, String filter);

! 	/**
! 	 * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! 	 * satisfies the filtering criteria. 
! 	 * 
! 	 * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! 	 * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! 	 * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! 	 * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! 	 * to specific tags, and is not a very clean approach. 
! 	 * 
! 	 * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! 	 * like :
! 	 * <pre>
! 	 * NodeList collectionList = new NodeList(); 
! 	 * Node node; 
! 	 * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! 	 * 		node = e.nextNode();
! 	 * 		node.collectInto (collectionVector, LinkTag.class);
! 	 * }
! 	 * </pre>
! 	 * Thus, collectionList will hold all the link nodes, irrespective of how
! 	 * deep the links are embedded. 
! 	 */
! 	public void collectInto(NodeList collectionList, Class nodeType) {
! 		if (nodeType.getName().equals(this.getClass().getName())) {
! 			collectionList.add(this);
! 		}
! 	}

! 	/**
! 	 * Returns the beginning position of the tag.
! 	 */
! 	public int elementBegin() {
! 		return nodeBegin;
! 	}

! 	/**
! 	 * Returns the ending position fo the tag
! 	 */
! 	public int elementEnd() {
! 		return nodeEnd;
! 	}

! 	public abstract void accept(Object visitor);

! 	/**
! 	 * @deprecated - use toHtml() instead
! 	 */
! 	public final String toHTML() {
! 		return toHtml();
! 	}
! 	
! 	/**
! 	 * Get the parent of this node.
       * This will always return null when parsing without scanners,
       * i.e. if semantic parsing was not performed.
       * The object returned from this method can be safely cast to a <code>CompositeTag</code>.
! 	 * @return The parent of this node, if it's been set, <code>null</code> otherwise.
! 	 */
! 	public Node getParent ()
      {
! 		return (parent);
! 	}

      /**
! 	 * Sets the parent of this node.
! 	 * @param node The node that contains this node. Must be a <code>CompositeTag</code>.
! 	 */
! 	public void setParent (Node node)
      {
! 		parent = node;
! 	}
! 	
      /**
       * Get the children of this node.
       * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise.
       */
! 	public NodeList getChildren ()
      {
          return (children);
--- 62,214 ----
       * @param end The ending position of the node.
       */
!     public AbstractNode (int begin, int end)
      {
!         nodeBegin = begin;
!         nodeEnd   = end;
          parent = null;
!     }

!     /**
!      * Returns a string representation of the node. This is an important method, it allows a simple string transformation
!      * of a web page, regardless of a node.<br>
!      * Typical application code (for extracting only the text from a web page) would then be simplified to  :<br>
!      * <pre>
!      * Node node;
!      * for (Enumeration e = parser.elements();e.hasMoreElements();) {
!      *    node = (Node)e.nextElement();
!      *    System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string
!      * }
!      * </pre>
!      */
!     public abstract String toPlainTextString();

!     /**
!      * This method will make it easier when using html parser to reproduce html pages (with or without modifications)
!      * Applications reproducing html can use this method on nodes which are to be used or transferred as they were 
!      * recieved, with the original html
!      */
!     public abstract String toHtml();

!     /**
!      * Return the string representation of the node.
!      * Subclasses must define this method, and this is typically to be used in the manner<br>
!      * <pre>System.out.println(node)</pre>
!      * @return java.lang.String
!      */
!     public abstract String toString();

! /**
! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! * satisfies the filtering criteria. 
! * 
! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! * to specific tags, and is not a very clean approach. 
! * 
! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! * like :
! * <pre>
! * NodeList collectionList = new NodeList(); 
! * Node node; 
! * String filter = LinkTag.LINK_TAG_FILTER; 
! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! * node = e.nextNode();
! * node.collectInto (collectionVector, filter); 
! * }
! * </pre>
! * Thus, collectionList will hold all the link nodes, irrespective of how
! * deep the links are embedded. This of course implies that tags must
! * fulfill their responsibilities toward honouring certain filters.
! * 
! * Important: In order to keep performance optimal, do not create you own filter strings, as 
! * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not
! * make calls like : 
! * collectInto(collectionList,"-l"), instead, make calls only like :
! * collectInto(collectionList,LinkTag.LINK_TAG_FILTER).
! * 
! * To find out if your desired tag has filtering support, check the API of the tag.
! */
! public abstract void collectInto(NodeList collectionList, String filter);

! /**
! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! * satisfies the filtering criteria. 
! * 
! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! * to specific tags, and is not a very clean approach. 
! * 
! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! * like :
! * <pre>
! * NodeList collectionList = new NodeList(); 
! * Node node; 
! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! * node = e.nextNode();
! * node.collectInto (collectionVector, LinkTag.class);
! * }
! * </pre>
! * Thus, collectionList will hold all the link nodes, irrespective of how
! * deep the links are embedded. 
! */
! public void collectInto(NodeList collectionList, Class nodeType) {
! if (nodeType.getName().equals(this.getClass().getName())) {
! collectionList.add(this);
! }
! }

!     /**
!      * Returns the beginning position of the tag.
!      */
!     public int elementBegin() {
!         return nodeBegin;
!     }

!     /**
!      * Returns the ending position fo the tag
!      */
!     public int elementEnd() {
!         return nodeEnd;
!     }

!     public abstract void accept(Object visitor);

!     /**
!      * @deprecated - use toHtml() instead
!      */
!     public final String toHTML() {
!         return toHtml();
!     }
!     
!     /**
!      * Get the parent of this node.
       * This will always return null when parsing without scanners,
       * i.e. if semantic parsing was not performed.
       * The object returned from this method can be safely cast to a <code>CompositeTag</code>.
!      * @return The parent of this node, if it's been set, <code>null</code> otherwise.
!      */
!     public Node getParent ()
      {
!         return (parent);
!     }

      /**
!      * Sets the parent of this node.
!      * @param node The node that contains this node. Must be a <code>CompositeTag</code>.
!      */
!     public void setParent (Node node)
      {
!         parent = node;
!     }
!     
      /**
       * Get the children of this node.
       * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise.
       */
!     public NodeList getChildren ()
      {
          return (children);
***************
*** 219,223 ****
       * @param children The new list of children this node contains.
       */
! 	public void setChildren (NodeList children)
      {
          this.children = children;
--- 219,223 ----
       * @param children The new list of children this node contains.
       */
!     public void setChildren (NodeList children)
      {
          this.children = children;
***************
*** 225,241 ****

      /**
! 	 * Returns the text of the string line
! 	 */
! 	public String getText() {
! 		return null;
! 	}
! 	
! 	/**
! 	 * Sets the string contents of the node.
! 	 * @param text The new text for the node.
! 	 */
! 	public void setText(String text) {

! 	}

  }
--- 225,241 ----

      /**
!      * Returns the text of the string line
!      */
!     public String getText() {
!         return null;
!     }
!     
!     /**
!      * Sets the string contents of the node.
!      * @param text The new text for the node.
!      */
!     public void setText(String text) {

!     }

  }

Index: Node.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -d -r1.36 -r1.37
*** Node.java	24 Aug 2003 21:59:41 -0000	1.36
--- Node.java	3 Sep 2003 23:36:18 -0000	1.37
***************
*** 34,146 ****

public interface Node {
! 	/**
! 	 * Returns a string representation of the node. This is an important method, it allows a simple string transformation
! 	 * of a web page, regardless of a node. 
! 	 * Typical application code (for extracting only the text from a web page) would then be simplified to : 
! 	 * <pre>
! 	 * Node node;
! 	 * for (Enumeration e = parser.elements();e.hasMoreElements();) {
! 	 * node = (Node)e.nextElement();
! 	 * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string
! 	 * }
! 	 * </pre>
! 	 */
! 	public abstract String toPlainTextString();
! 	/**
! 	 * This method will make it easier when using html parser to reproduce html pages (with or without modifications)
! 	 * Applications reproducing html can use this method on nodes which are to be used or transferred as they were 
! 	 * recieved, with the original html
! 	 */
! 	public abstract String toHtml();
! 	/**
! 	 * Return the string representation of the node.
! 	 * Subclasses must define this method, and this is typically to be used in the manner 
! 	 * <pre>System.out.println(node)</pre>
! 	 * @return java.lang.String
! 	 */
! 	public abstract String toString();
! 	/**
! 	 * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! 	 * satisfies the filtering criteria. 
! 	 * 
! 	 * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! 	 * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! 	 * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! 	 * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! 	 * to specific tags, and is not a very clean approach. 
! 	 * 
! 	 * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! 	 * like :
! 	 * <pre>
! 	 * NodeList collectionList = new NodeList(); 
! 	 * Node node; 
! 	 * String filter = LinkTag.LINK_TAG_FILTER; 
! 	 * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! 	 * 		node = e.nextNode();
! 	 * 		node.collectInto (collectionVector, filter); 
! 	 * }
! 	 * </pre>
! 	 * Thus, collectionList will hold all the link nodes, irrespective of how
! 	 * deep the links are embedded. This of course implies that tags must
! 	 * fulfill their responsibilities toward honouring certain filters.
! 	 * 
! 	 * Important: In order to keep performance optimal, do not create you own filter strings, as 
! 	 * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not
! 	 * make calls like : 
! 	 * collectInto(collectionList,"-l"), instead, make calls only like :
! 	 * collectInto(collectionList,LinkTag.LINK_TAG_FILTER).
! 	 * 
! 	 * To find out if your desired tag has filtering support, check the API of the tag.
! 	 */
! 	public abstract void collectInto(NodeList collectionList, String filter);
! 	/**
! 	 * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! 	 * satisfies the filtering criteria. 
! 	 * 
! 	 * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! 	 * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! 	 * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! 	 * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! 	 * to specific tags, and is not a very clean approach. 
! 	 * 
! 	 * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! 	 * like :
! 	 * <pre>
! 	 * NodeList collectionList = new NodeList(); 
! 	 * Node node; 
! 	 * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! 	 * 		node = e.nextNode();
! 	 * 		node.collectInto (collectionVector, LinkTag.class);
! 	 * }
! 	 * </pre>
! 	 * Thus, collectionList will hold all the link nodes, irrespective of how
! 	 * deep the links are embedded. 
! 	 */
! 	public abstract void collectInto(NodeList collectionList, Class nodeType);
! 	/**
! 	 * Returns the beginning position of the tag.
! 	 */
! 	public abstract int elementBegin();
! 	/**
! 	 * Returns the ending position fo the tag
! 	 */
! 	public abstract int elementEnd();

! 	public abstract void accept(Object visitor);

! 	/**
! 	 * Get the parent of this node.
       * This will always return null when parsing without scanners,
       * i.e. if semantic parsing was not performed.
       * The object returned from this method can be safely cast to a <code>CompositeTag</code>.
! 	 * @return The parent of this node, if it's been set, <code>null</code> otherwise.
! 	 */
! 	public abstract Node getParent ();

      /**
! 	 * Sets the parent of this node.
! 	 * @param node The node that contains this node. Must be a <code>CompositeTag</code>.
! 	 */
! 	public abstract void setParent (Node node);

      /**
--- 34,146 ----

public interface Node {
! /**
! * Returns a string representation of the node. This is an important method, it allows a simple string transformation
! * of a web page, regardless of a node. 
! * Typical application code (for extracting only the text from a web page) would then be simplified to : 
! * <pre>
! * Node node;
! * for (Enumeration e = parser.elements();e.hasMoreElements();) {
! * node = (Node)e.nextElement();
! * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string
! * }
! * </pre>
! */
! public abstract String toPlainTextString();
! /**
! * This method will make it easier when using html parser to reproduce html pages (with or without modifications)
! * Applications reproducing html can use this method on nodes which are to be used or transferred as they were 
! * recieved, with the original html
! */
! public abstract String toHtml();
! /**
! * Return the string representation of the node.
! * Subclasses must define this method, and this is typically to be used in the manner 
! * <pre>System.out.println(node)</pre>
! * @return java.lang.String
! */
! public abstract String toString();
! /**
! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! * satisfies the filtering criteria. 
! * 
! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! * to specific tags, and is not a very clean approach. 
! * 
! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! * like :
! * <pre>
! * NodeList collectionList = new NodeList(); 
! * Node node; 
! * String filter = LinkTag.LINK_TAG_FILTER; 
! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! * node = e.nextNode();
! * node.collectInto (collectionVector, filter); 
! * }
! * </pre>
! * Thus, collectionList will hold all the link nodes, irrespective of how
! * deep the links are embedded. This of course implies that tags must
! * fulfill their responsibilities toward honouring certain filters.
! * 
! * Important: In order to keep performance optimal, do not create you own filter strings, as 
! * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not
! * make calls like : 
! * collectInto(collectionList,"-l"), instead, make calls only like :
! * collectInto(collectionList,LinkTag.LINK_TAG_FILTER).
! * 
! * To find out if your desired tag has filtering support, check the API of the tag.
! */
! public abstract void collectInto(NodeList collectionList, String filter);
! /**
! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node
! * satisfies the filtering criteria. 
! * 
! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection
! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it
! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links
! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down
! * to specific tags, and is not a very clean approach. 
! * 
! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look 
! * like :
! * <pre>
! * NodeList collectionList = new NodeList(); 
! * Node node; 
! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) {
! * node = e.nextNode();
! * node.collectInto (collectionVector, LinkTag.class);
! * }
! * </pre>
! * Thus, collectionList will hold all the link nodes, irrespective of how
! * deep the links are embedded. 
! */
! public abstract void collectInto(NodeList collectionList, Class nodeType);
! /**
! * Returns the beginning position of the tag.
! */
! public abstract int elementBegin();
! /**
! * Returns the ending position fo the tag
! */
! public abstract int elementEnd();

!     public abstract void accept(Object visitor);

!     /**
!      * Get the parent of this node.
       * This will always return null when parsing without scanners,
       * i.e. if semantic parsing was not performed.
       * The object returned from this method can be safely cast to a <code>CompositeTag</code>.
!      * @return The parent of this node, if it's been set, <code>null</code> otherwise.
!      */
!     public abstract Node getParent ();

      /**
!      * Sets the parent of this node.
!      * @param node The node that contains this node. Must be a <code>CompositeTag</code>.
!      */
!     public abstract void setParent (Node node);

      /**
***************
*** 148,152 ****
       * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise.
       */
! 	public abstract NodeList getChildren ();

      /**
--- 148,152 ----
       * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise.
       */
!     public abstract NodeList getChildren ();

      /**
***************
*** 154,168 ****
       * @param children The new list of children this node contains.
       */
! 	public abstract void setChildren (NodeList children);

! 	/**
! 	 * Returns the text of the string line
! 	 */
! 	public String getText();		
! 	
! 	/**
! 	 * Sets the string contents of the node.
! 	 * @param text The new text for the node.
! 	 */
! 	public void setText(String text);		
  }
--- 154,168 ----
       * @param children The new list of children this node contains.
       */
!     public abstract void setChildren (NodeList children);

!     /**
!      * Returns the text of the string line
!      */
!     public String getText();        
!     
!     /**
!      * Sets the string contents of the node.
!      * @param text The new text for the node.
!      */
!     public void setText(String text);       
  }

Index: NodeReader.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/NodeReader.java,v
retrieving revision 1.40
retrieving revision 1.41
diff -C2 -d -r1.40 -r1.41
*** NodeReader.java	24 Aug 2003 21:59:41 -0000	1.40
--- NodeReader.java	3 Sep 2003 23:36:18 -0000	1.41
***************
*** 52,98 ****
  public class NodeReader extends BufferedReader
  {
! 	public static final String DECIPHER_ERROR="NodeReader.readElement() : Error occurred while trying to decipher the tag using scanners";
! 	protected int posInLine=-1;
! 	protected String line;
! 	protected Node node = null;
! 	protected TagScanner previousOpenScanner = null;
! 	protected String url;
! 	private Parser parser;
! 	private int lineCount;
! 	private String previousLine;
! 	private StringParser stringParser = new StringParser();
! 	private RemarkNodeParser remarkNodeParser = new RemarkNodeParser();
! 	private NodeList nextParsedNode = new NodeList();
! 	private boolean dontReadNextLine=false;
! 	/**
! 	 * The constructor takes in a reader object, it's length and the url to be read.
! 	 */
! 	public NodeReader(Reader in,int len,String url)
! 	{
! 		super(in, len);
! 		this.url = url;
! 		this.parser = null;		
! 		this.lineCount = 1;
! 	}
! 	/**
! 	 * This constructor basically overrides the existing constructor in the
! 	 * BufferedReader class.
       * The URL defaults to an empty string.
       * @see #NodeReader(Reader,int,String)
! 	 */

! 	public NodeReader(Reader in, int len)
! 	{
! 		this(in,len,"");
! 	}
! 	/**
! 	 * The constructor takes in a reader object, and the url to be read.
       * The buffer size defaults to 8192.
       * @see #NodeReader(Reader,int,String)
! 	 */
! 	public NodeReader(Reader in,String url)
! 	{
! 		this(in, 8192, url);
! 	}

      /**
--- 52,98 ----
  public class NodeReader extends BufferedReader
  {
!     public static final String DECIPHER_ERROR="NodeReader.readElement() : Error occurred while trying to decipher the tag using scanners";
!     protected int posInLine=-1;
!     protected String line;
!     protected Node node = null;
!     protected TagScanner previousOpenScanner = null;
!     protected String url;
!     private Parser parser;
!     private int lineCount;
!     private String previousLine;
!     private StringParser stringParser = new StringParser();
!     private RemarkNodeParser remarkNodeParser = new RemarkNodeParser();
!     private NodeList nextParsedNode = new NodeList();
!     private boolean dontReadNextLine=false;
!     /**
!      * The constructor takes in a reader object, it's length and the url to be read.
!      */
!     public NodeReader(Reader in,int len,String url)
!     {
!         super(in, len);
!         this.url = url;
!         this.parser = null;     
!         this.lineCount = 1;
!     }
!     /**
!      * This constructor basically overrides the existing constructor in the
!      * BufferedReader class.
       * The URL defaults to an empty string.
       * @see #NodeReader(Reader,int,String)
!      */

!     public NodeReader(Reader in, int len)
!     {
!         this(in,len,"");
!     }
!     /**
!      * The constructor takes in a reader object, and the url to be read.
       * The buffer size defaults to 8192.
       * @see #NodeReader(Reader,int,String)
!      */
!     public NodeReader(Reader in,String url)
!     {
!         this(in, 8192, url);
!     }

      /**
***************
*** 105,177 ****
      }

! 	/**
! 	 * This method is intended to be called only by scanners, when a situation of dirty html has arisen, 
! 	 * and action has been taken to correct the parsed tags. For e.g. if we have html of the form :
! 	 * <pre>
! 	 * <a href="somelink.html"><img src=...><td><tr><a href="someotherlink.html">...</a>
! 	 * </pre>
! 	 * Now to salvage the first link, we'd probably like to insert an end tag somewhere (typically before the
! 	 * second begin link tag). So that the parsing continues uninterrupted, we will need to change the existing
! 	 * line being parsed, to contain the end tag in it. 
! 	 */
! 	public void changeLine(String line) {
! 		this.line = line;
! 	}
! 	public String getCurrentLine() {
! 		return line;
! 	}
! 	/**
! 	 * Get the last line number that the reader has read
! 	 * @return int last line number read by the reader
! 	 */ 
! 	public int getLastLineNumber() {
! 		return lineCount-1;
! 	}

! 	/**
! 	 * This method is useful when designing your own scanners. You might need to find out what is the location where the
! 	 * reader has stopped last.
! 	 * @return int Last position read by the reader
! 	 */
! 	public int getLastReadPosition() {
! 		if (node!=null) return node.elementEnd(); else
! 		return 0;
! 	}

! 	/*
! 	 * Read the next line
! 	 * @return String containing the line
! 	 */
! 	public String getNextLine()
! 	{
! 		try
! 		{
! 			previousLine = line;
! 			line = readLine();
! 			if (line!=null)
! 				lineCount++;
! 			posInLine = 0;
! 			return line;
! 		}
! 		catch (IOException e)
! 		{
! 			System.err.println("I/O Exception occurred while reading!");
! 		}
! 		return null;
! 	}
! 	/**
! 	 * Returns the parser object for which this reader exists
! 	 * @return org.htmlparser.Parser
! 	 */
! 	public Parser getParser() {
! 		return parser;
! 	}
! 	/**
! 	 * Gets the previousOpenScanner.
! 	 * @return Returns a TagScanner
! 	 */
! 	public TagScanner getPreviousOpenScanner() {
! 		return previousOpenScanner;
! 	}

      /**
--- 105,177 ----
      }

! /**
! * This method is intended to be called only by scanners, when a situation of dirty html has arisen, 
! * and action has been taken to correct the parsed tags. For e.g. if we have html of the form :
! * <pre>
! * <a href="somelink.html"><img src=...><td><tr><a href="someotherlink.html">...</a>
! * </pre>
! * Now to salvage the first link, we'd probably like to insert an end tag somewhere (typically before the
! * second begin link tag). So that the parsing continues uninterrupted, we will need to change the existing
! * line being parsed, to contain the end tag in it. 
! */
! public void changeLine(String line) {
! this.line = line;
! }
! public String getCurrentLine() {
! return line;
! }
! /**
! * Get the last line number that the reader has read
! * @return int last line number read by the reader
! */ 
! public int getLastLineNumber() {
! return lineCount-1;
! }

!     /**
!      * This method is useful when designing your own scanners. You might need to find out what is the location where the
!      * reader has stopped last.
!      * @return int Last position read by the reader
!      */
!     public int getLastReadPosition() {
!         if (node!=null) return node.elementEnd(); else
!         return 0;
!     }

!     /*
!      * Read the next line
!      * @return String containing the line
!      */
!     public String getNextLine()
!     {
!         try
!         {
!             previousLine = line;
!             line = readLine();
!             if (line!=null)
!                 lineCount++;
!             posInLine = 0;
!             return line;
!         }
!         catch (IOException e)
!         {
!             System.err.println("I/O Exception occurred while reading!");
!         }
!         return null;
!     }
!     /**
!      * Returns the parser object for which this reader exists
!      * @return org.htmlparser.Parser
!      */
!     public Parser getParser() {
!         return parser;
!     }
!     /**
!      * Gets the previousOpenScanner.
!      * @return Returns a TagScanner
!      */
!     public TagScanner getPreviousOpenScanner() {
!         return previousOpenScanner;
!     }

      /**
***************
*** 202,241 ****

      /**
! 	 * Read the next element
! 	 * @return Node - The next node
!  	 */
! 	public Node readElement() throws ParserException
      {
          return (readElement (false));
      }

! 	/**
! 	 * Read the next element
       * @param balance_quotes If <code>true</code> string nodes are parsed
       * paying attention to single and double quotes, such that tag-like
       * strings are ignored if they are quoted.
! 	 * @return Node - The next node
!  	 */
! 	public Node readElement(boolean balance_quotes) throws ParserException
! 	{
! 		try {
! 			if (nextParsedNode.size()>0) {
! 				node = nextParsedNode.elementAt(0);
! 				nextParsedNode.remove(0);
! 				return node; 
! 			}
! 			if (readNextLine()) {
! 				do
! 				{
! 					line = getNextLine();
! 				}
! 				while (line!=null && line.length()==0);
! 	
! 			}
              else if (dontReadNextLine) {
!             	dontReadNextLine = false;
              } else
                  posInLine = getLastReadPosition() + 1;
! 			if (line==null)
                  return null;

--- 202,241 ----

      /**
!      * Read the next element
!      * @return Node - The next node
!      */
!     public Node readElement() throws ParserException
      {
          return (readElement (false));
      }

!     /**
!      * Read the next element
       * @param balance_quotes If <code>true</code> string nodes are parsed
       * paying attention to single and double quotes, such that tag-like
       * strings are ignored if they are quoted.
!      * @return Node - The next node
!      */
!     public Node readElement(boolean balance_quotes) throws ParserException
!     {
!         try {
!             if (nextParsedNode.size()>0) {
!                 node = nextParsedNode.elementAt(0);
!                 nextParsedNode.remove(0);
!                 return node; 
!             }
!             if (readNextLine()) {
!                 do
!                 {
!                     line = getNextLine();
!                 }
!                 while (line!=null && line.length()==0);
!     
!             }
              else if (dontReadNextLine) {
!                 dontReadNextLine = false;
              } else
                  posInLine = getLastReadPosition() + 1;
!             if (line==null)
                  return null;

***************
*** 255,263 ****
                      }
                      catch (Exception e)
!                     {			
                          StringBuffer msgBuffer = new StringBuffer();
                          msgBuffer.append(DECIPHER_ERROR+"\n" +
!                         	"    Tag being processed : "+tag.getTagName()+"\n" +
!                         	"    Current Tag Line : "+tag.getTagLine()
                          ); 
                          appendLineDetails(msgBuffer);
--- 255,263 ----
                      }
                      catch (Exception e)
!                     {           
                          StringBuffer msgBuffer = new StringBuffer();
                          msgBuffer.append(DECIPHER_ERROR+"\n" +
!                             "    Tag being processed : "+tag.getTagName()+"\n" +
!                             "    Current Tag Line : "+tag.getTagLine()
                          ); 
                          appendLineDetails(msgBuffer);
***************
*** 277,400 ****
                  if (node!=null) return node;
              }
! 		
! 			return null;
! 		}
          catch (ParserException pe)
          {
              throw pe;
          }
! 		catch (Exception e) {
! 			StringBuffer msgBuffer = new StringBuffer("NodeReader.readElement() : Error occurred while trying to read the next element,");
! 			StringWriter sw = new StringWriter();
! 			e.printStackTrace(new PrintWriter(sw));
! 			appendLineDetails(msgBuffer);
! 			msgBuffer.append("\n Caused by:\n").append(sw.getBuffer().toString ());
! 			ParserException ex = new ParserException(msgBuffer.toString(),e);
! 			parser.getFeedback().error(msgBuffer.toString(),ex);
! 			throw ex;			
! 		}
! 	}
! 	public void appendLineDetails(StringBuffer msgBuffer) {
! 		msgBuffer.append("\nat Line ");
! 		msgBuffer.append(getLineCount());
! 		msgBuffer.append(" : ");
! 		msgBuffer.append(getLine());
! 		msgBuffer.append("\nPrevious Line ").append(getLineCount()-1);
! 		msgBuffer.append(" : ").append(getPreviousLine());
! 	}
! 	/**
! 	 * Do we need to read the next line ?
! 	 * @return true - yes/ false - no
! 	 */
! 	protected boolean readNextLine()
! 	{
! 		if (dontReadNextLine) {
! 			return false;
! 		}
! 		if (posInLine==-1 || (line!=null && node.elementEnd()+1>=line.length()))
! 				return true;
! 		else return false;
! 	}
! 	/**
! 	 * The setParser method is used by the parser to put its own object into the reader. This happens internally,
! 	 * so this method is not generally for use by the developer or the user.
! 	 */
! 	public void setParser(Parser newParser) {
! 		parser = newParser;
! 	}
! 	/**
! 	 * Sets the previousOpenScanner.
! 	 * @param previousOpenScanner The previousOpenScanner to set
! 	 */
! 	public void setPreviousOpenScanner(TagScanner previousOpenScanner) {
! 		this.previousOpenScanner = previousOpenScanner;
! 	}
! 	
! 	/**
! 	 * @param lineSeparator New Line separator to be used
! 	 */
! 	public static void setLineSeparator(String lineSeparator)
! 	{
! 		Parser.setLineSeparator(lineSeparator);	
! 	}
! 	
! 	/**
! 	 * Gets the line seperator that is being used
! 	 * @return String
! 	 */ 
! 	public static String getLineSeparator()
! 	{
! 		return (Parser.getLineSeparator());
! 	}
! 	/**
! 	 * Returns the lineCount.
! 	 * @return int
! 	 */
! 	public int getLineCount() {
! 		return lineCount;
! 	}

! 	/**
! 	 * Returns the previousLine.
! 	 * @return String
! 	 */
! 	public String getPreviousLine() {
! 		return previousLine;
! 	}

! 	/**
! 	 * Returns the line.
! 	 * @return String
! 	 */
! 	public String getLine() {
! 		return line;
! 	}

! 	/**
! 	 * Sets the lineCount.
! 	 * @param lineCount The lineCount to set
! 	 */
! 	public void setLineCount(int lineCount) {
! 		this.lineCount = lineCount;
! 	}

! 	/**
! 	 * Sets the posInLine.
! 	 * @param posInLine The posInLine to set
! 	 */
! 	public void setPosInLine(int posInLine) {
! 		this.posInLine = posInLine;
! 	}

! 	public void reset() throws IOException {
! 		super.reset();
! 		lineCount = 1;
! 		posInLine = -1;
! 	}

! 	public StringParser getStringParser() {
! 		return stringParser;
! 	}

      /**
--- 277,400 ----
                  if (node!=null) return node;
              }
!         
!             return null;
!         }
          catch (ParserException pe)
          {
              throw pe;
          }
!         catch (Exception e) {
!             StringBuffer msgBuffer = new StringBuffer("NodeReader.readElement() : Error occurred while trying to read the next element,");
!             StringWriter sw = new StringWriter();
!             e.printStackTrace(new PrintWriter(sw));
!             appendLineDetails(msgBuffer);
!             msgBuffer.append("\n Caused by:\n").append(sw.getBuffer().toString ());
!             ParserException ex = new ParserException(msgBuffer.toString(),e);
!             parser.getFeedback().error(msgBuffer.toString(),ex);
!             throw ex;           
!         }
!     }
!     public void appendLineDetails(StringBuffer msgBuffer) {
!         msgBuffer.append("\nat Line ");
!         msgBuffer.append(getLineCount());
!         msgBuffer.append(" : ");
!         msgBuffer.append(getLine());
!         msgBuffer.append("\nPrevious Line ").append(getLineCount()-1);
!         msgBuffer.append(" : ").append(getPreviousLine());
!     }
!     /**
!      * Do we need to read the next line ?
!      * @return true - yes/ false - no
!      */
!     protected boolean readNextLine()
!     {
!         if (dontReadNextLine) {
!             return false;
!         }
!         if (posInLine==-1 || (line!=null && node.elementEnd()+1>=line.length()))
!                 return true;
!         else return false;
!     }
!     /**
!      * The setParser method is used by the parser to put its own object into the reader. This happens internally,
!      * so this method is not generally for use by the developer or the user.
!      */
!     public void setParser(Parser newParser) {
!         parser = newParser;
!     }
!     /**
!      * Sets the previousOpenScanner.
!      * @param previousOpenScanner The previousOpenScanner to set
!      */
!     public void setPreviousOpenScanner(TagScanner previousOpenScanner) {
!         this.previousOpenScanner = previousOpenScanner;
!     }
!     
!     /**
!      * @param lineSeparator New Line separator to be used
!      */
!     public static void setLineSeparator(String lineSeparator)
!     {
!         Parser.setLineSeparator(lineSeparator); 
!     }
!     
!     /**
!      * Gets the line seperator that is being used
!      * @return String
!      */ 
!     public static String getLineSeparator()
!     {
!         return (Parser.getLineSeparator());
!     }
!     /**
!      * Returns the lineCount.
!      * @return int
!      */
!     public int getLineCount() {
!         return lineCount;
!     }

!     /**
!      * Returns the previousLine.
!      * @return String
!      */
!     public String getPreviousLine() {
!         return previousLine;
!     }

!     /**
!      * Returns the line.
!      * @return String
!      */
!     public String getLine() {
!         return line;
!     }

!     /**
!      * Sets the lineCount.
!      * @param lineCount The lineCount to set
!      */
!     public void setLineCount(int lineCount) {
!         this.lineCount = lineCount;
!     }

!     /**
!      * Sets the posInLine.
!      * @param posInLine The posInLine to set
!      */
!     public void setPosInLine(int posInLine) {
!         this.posInLine = posInLine;
!     }

!     public void reset() throws IOException {
!         super.reset();
!         lineCount = 1;
!         posInLine = -1;
!     }

!     public StringParser getStringParser() {
!         return stringParser;
!     }

      /**
***************
*** 404,417 ****
       * @param nextParsedNode The node that will be returned next by the reader.
       */
! 	public void addNextParsedNode(Node nextParsedNode) {
! 		this.nextParsedNode.prepend(nextParsedNode);		
! 	}
! 	
! 	public boolean isDontReadNextLine() {
! 		return dontReadNextLine;
! 	}

! 	public void setDontReadNextLine(boolean dontReadNextLine) {
! 		this.dontReadNextLine = dontReadNextLine;
! 	}
  }
--- 404,417 ----
       * @param nextParsedNode The node that will be returned next by the reader.
       */
!     public void addNextParsedNode(Node nextParsedNode) {
!         this.nextParsedNode.prepend(nextParsedNode);        
!     }
!     
!     public boolean isDontReadNextLine() {
!         return dontReadNextLine;
!     }

!     public void setDontReadNextLine(boolean dontReadNextLine) {
!         this.dontReadNextLine = dontReadNextLine;
!     }
  }

Index: RemarkNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNode.java,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** RemarkNode.java	24 Aug 2003 21:59:41 -0000	1.27
--- RemarkNode.java	3 Sep 2003 23:36:18 -0000	1.28
***************
*** 38,88 ****
  public class RemarkNode extends AbstractNode
  {
! 	public final static String REMARK_NODE_FILTER="-r";
! 	
! 	/**
! 	 * Tag contents will have the contents of the comment tag.
!    	 */
! 	String tagContents;
! 	/**
! 	 * The HTMLRemarkTag is constructed by providing the beginning posn, ending posn
! 	 * and the tag contents.
! 	 * @param nodeBegin beginning position of the tag
! 	 * @param nodeEnd ending position of the tag
! 	 * @param tagContents contents of the remark tag
! 	 */
! 	public RemarkNode(int nodeBegin, int nodeEnd, String tagContents)
! 	{
! 		super(nodeBegin,nodeEnd);
! 		this.tagContents = tagContents;
! 	}

! 	/** 
! 	 * Returns the text contents of the comment tag.
! 	 */
! 	public String getText()
! 	{
! 		return tagContents;
! 	}
! 	public String toPlainTextString() {
! 		return tagContents;
! 	}
! 	public String toHtml() {
! 		return "<!--"+tagContents+"-->";
! 	}
! 	/**
! 	 * Print the contents of the remark tag.
! 	 */
! 	public String toString()
! 	{
! 		return "Comment Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n";
! 	}

! 	public void collectInto(NodeList collectionList, String filter) {
! 		if (filter==REMARK_NODE_FILTER) collectionList.add(this);
! 	}

! 	public void accept(Object visitor) {
! 		((NodeVisitor)visitor).visitRemarkNode(this);
! 	}

  }
--- 38,88 ----
  public class RemarkNode extends AbstractNode
  {
!     public final static String REMARK_NODE_FILTER="-r";
!     
!     /**
!      * Tag contents will have the contents of the comment tag.
!      */
!     String tagContents;
!     /**
!      * The HTMLRemarkTag is constructed by providing the beginning posn, ending posn
!      * and the tag contents.
!      * @param nodeBegin beginning position of the tag
!      * @param nodeEnd ending position of the tag
!      * @param tagContents contents of the remark tag
!      */
!     public RemarkNode(int nodeBegin, int nodeEnd, String tagContents)
!     {
!         super(nodeBegin,nodeEnd);
!         this.tagContents = tagContents;
!     }

!     /** 
!      * Returns the text contents of the comment tag.
!      */
!     public String getText()
!     {
!         return tagContents;
!     }
!     public String toPlainTextString() {
!         return tagContents;
!     }
!     public String toHtml() {
!         return "<!--"+tagContents+"-->";
!     }
!     /**
!      * Print the contents of the remark tag.
!      */
!     public String toString()
!     {
!         return "Comment Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n";
!     }

!     public void collectInto(NodeList collectionList, String filter) {
!         if (filter==REMARK_NODE_FILTER) collectionList.add(this);
!     }

!     public void accept(Object visitor) {
!         ((NodeVisitor)visitor).visitRemarkNode(this);
!     }

  }

Index: StringNode.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v
retrieving revision 1.35
retrieving revision 1.36
diff -C2 -d -r1.35 -r1.36
*** StringNode.java	24 Aug 2003 21:59:41 -0000	1.35
--- StringNode.java	3 Sep 2003 23:36:18 -0000	1.36
***************
*** 38,93 ****
  public class StringNode extends AbstractNode
  {
! 	public static final String STRING_FILTER="-string";
! 	
! 	/**
! 	 * The text of the string.
! 	 */	
! 	protected StringBuffer textBuffer;

! 	/** 
! 	 * Constructor takes in the text string, beginning and ending posns.
! 	 * @param text The contents of the string line
! 	 * @param textBegin The beginning position of the string
! 	 * @param textEnd The ending positiong of the string
! 	 */
! 	public StringNode (StringBuffer text, int textBegin,int textEnd)
! 	{
! 		super(textBegin,textEnd);
! 		this.textBuffer = text;
! 	}

! 	/**
! 	 * Returns the text of the string line
! 	 */
! 	public String getText() {
! 		return textBuffer.toString();
! 	}
      /**
       * Sets the string contents of the node.
       * @param text The new text for the node.
       */
! 	public void setText(String text)
! 	{
! 		textBuffer = new StringBuffer (text);
! 	}
! 	
! 	public String toPlainTextString() {
! 		return textBuffer.toString();
! 	}
! 	
! 	public String toHtml() {
! 		return textBuffer.toString();
! 	}
! 	
! 	public String toString() {
! 		return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd();
! 	}
! 	
! 	public void collectInto(NodeList collectionList, String filter) {
! 		if (filter==STRING_FILTER) collectionList.add(this);
! 	}

! 	public void accept(Object visitor) {
! 		((NodeVisitor)visitor).visitStringNode(this);
! 	}
  }
--- 38,93 ----
  public class StringNode extends AbstractNode
  {
!     public static final String STRING_FILTER="-string";
!     
!     /**
!      * The text of the string.
!      */ 
!     protected StringBuffer textBuffer;

!     /** 
!      * Constructor takes in the text string, beginning and ending posns.
!      * @param text The contents of the string line
!      * @param textBegin The beginning position of the string
!      * @param textEnd The ending positiong of the string
!      */
!     public StringNode (StringBuffer text, int textBegin,int textEnd)
!     {
!         super(textBegin,textEnd);
!         this.textBuffer = text;
!     }

!     /**
!      * Returns the text of the string line
!      */
!     public String getText() {
!         return textBuffer.toString();
!     }
      /**
       * Sets the string contents of the node.
       * @param text The new text for the node.
       */
!     public void setText(String text)
!     {
!         textBuffer = new StringBuffer (text);
!     }
!     
!     public String toPlainTextString() {
!         return textBuffer.toString();
!     }
!     
!     public String toHtml() {
!         return textBuffer.toString();
!     }
!     
!     public String toString() {
!         return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd();
!     }
!     
!     public void collectInto(NodeList collectionList, String filter) {
!         if (filter==STRING_FILTER) collectionList.add(this);
!     }

!     public void accept(Object visitor) {
!         ((NodeVisitor)visitor).visitStringNode(this);
!     }
  }

Index: StringNodeFactory.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNodeFactory.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** StringNodeFactory.java	12 Jul 2003 00:33:59 -0000	1.1
--- StringNodeFactory.java	3 Sep 2003 23:36:18 -0000	1.2
***************
*** 8,70 ****

  public class StringNodeFactory implements Serializable {
! 	
! 	/**
! 	 * Flag to tell the parser to decode strings returned by StringNode's toPlainTextString.  
! 	 * Decoding occurs via the method, org.htmlparser.util.Translate.decode()
! 	 */
! 	private boolean shouldDecodeNodes = false;

! 	/**
! 	 * Flag to tell the parser to remove escape characters, like \n and \t, returned by StringNode's toPlainTextString.  
! 	 * Escape character removal occurs via the method, org.htmlparser.util.ParserUtils.removeEscapeCharacters()
! 	 */
! 	private boolean shouldRemoveEscapeCharacters = false;
! 	
! 	/**
! 	 * Flag to tell the parser to convert non breaking space 
! 	 * (i.e. \u00a0) to a space (" ").  If true, this will happen inside StringNode's toPlainTextString.  
! 	 */
! 	private boolean shouldConvertNonBreakingSpace = false;

! 	public Node createStringNode(
! 		StringBuffer textBuffer,
! 		int textBegin,
! 		int textEnd) {
! 		Node newNode = new StringNode(textBuffer, textBegin, textEnd);
! 		if (shouldDecodeNodes())
! 			newNode = new DecodingNode(newNode);
! 		if (shouldRemoveEscapeCharacters())
! 			newNode = new EscapeCharacterRemovingNode(newNode);
! 		if (shouldConvertNonBreakingSpace())
! 			newNode = new NonBreakingSpaceConvertingNode(newNode);
! 		return newNode;
! 	}
! 	
! 	/**
! 	 * Tells the parser to decode nodes using org.htmlparser.util.Translate.decode()
! 	 */
! 	public void setNodeDecoding(boolean shouldDecodeNodes) {
! 			this.shouldDecodeNodes = shouldDecodeNodes;
! 		}

! 	public boolean shouldDecodeNodes() {
! 		return shouldDecodeNodes;
! 	}

! 	public void setEscapeCharacterRemoval(boolean shouldRemoveEscapeCharacters) {
! 		this.shouldRemoveEscapeCharacters = shouldRemoveEscapeCharacters;
! 	}

! 	public boolean shouldRemoveEscapeCharacters() {
! 		return shouldRemoveEscapeCharacters;
! 	}

! 	public void setNonBreakSpaceConversion(boolean shouldConvertNonBreakSpace) {
! 		this.shouldConvertNonBreakingSpace = shouldConvertNonBreakSpace;
! 	}
! 	
! 	public boolean shouldConvertNonBreakingSpace() {
! 		return shouldConvertNonBreakingSpace;
! 	}	
  }
--- 8,70 ----

  public class StringNodeFactory implements Serializable {
!     
!     /**
!      * Flag to tell the parser to decode strings returned by StringNode's toPlainTextString.  
!      * Decoding occurs via the method, org.htmlparser.util.Translate.decode()
!      */
!     private boolean shouldDecodeNodes = false;

!     /**
!      * Flag to tell the parser to remove escape characters, like \n and \t, returned by StringNode's toPlainTextString.  
!      * Escape character removal occurs via the method, org.htmlparser.util.ParserUtils.removeEscapeCharacters()
!      */
!     private boolean shouldRemoveEscapeCharacters = false;
!     
!     /**
!      * Flag to tell the parser to convert non breaking space 
!      * (i.e. \u00a0) to a space (" ").  If true, this will happen inside StringNode's toPlainTextString.  
!      */
!     private boolean shouldConvertNonBreakingSpace = false;

!     public Node createStringNode(
!         StringBuffer textBuffer,
!         int textBegin,
!         int textEnd) {
!         Node newNode = new StringNode(textBuffer, textBegin, textEnd);
!         if (shouldDecodeNodes())
!             newNode = new DecodingNode(newNode);
!         if (shouldRemoveEscapeCharacters())
!             newNode = new EscapeCharacterRemovingNode(newNode);
!         if (shouldConvertNonBreakingSpace())
!             newNode = new NonBreakingSpaceConvertingNode(newNode);
!         return newNode;
!     }
!     
!     /**
!      * Tells the parser to decode nodes using org.htmlparser.util.Translate.decode()
!      */
!     public void setNodeDecoding(boolean shouldDecodeNodes) {
!             this.shouldDecodeNodes = shouldDecodeNodes;
!         }

!     public boolean shouldDecodeNodes() {
!         return shouldDecodeNodes;
!     }

!     public void setEscapeCharacterRemoval(boolean shouldRemoveEscapeCharacters) {
!         this.shouldRemoveEscapeCharacters = shouldRemoveEscapeCharacters;
!     }

!     public boolean shouldRemoveEscapeCharacters() {
!         return shouldRemoveEscapeCharacters;
!     }

!     public void setNonBreakSpaceConversion(boolean shouldConvertNonBreakSpace) {
!         this.shouldConvertNonBreakingSpace = shouldConvertNonBreakSpace;
!     }
!     
!     public boolean shouldConvertNonBreakingSpace() {
!         return shouldConvertNonBreakingSpace;
!     }   
  }

[Htmlparser-cvs] htmlparser/src/org/htmlparser AbstractNode.java,1.10,1.11 Node.java,1.36,1.37 NodeR

[Htmlparser-cvs] htmlparser/src/org/htmlparser AbstractNode.java,1.10,1.11 Node.java,1.36,1.37 NodeReader.java,1.40,1.41 RemarkNode.java,1.27,1.28 StringNode.java,1.35,1.36 StringNodeFactory.java,1.1,1.2