Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser Node.java,1.52,1.53 Parser.java,1.104,1.105 Prototypi
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2005-04-24 17:48:37
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv5117/htmlparser/src/org/htmlparser Modified Files: Node.java Parser.java PrototypicalNodeFactory.java Log Message: Documentation revamp part three. Reworked some JavaDoc descriptions. Added "HTML Parser for dummies" introductory text. Removed checkstyle.jar and fit.jar (and it's cruft). Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.104 retrieving revision 1.105 diff -C2 -d -r1.104 -r1.105 *** Parser.java 5 Apr 2005 00:48:10 -0000 1.104 --- Parser.java 24 Apr 2005 17:48:27 -0000 1.105 *************** *** 117,121 **** * The floating point version number ({@value}). */ ! public final static double VERSION_NUMBER = 1.5 ; --- 117,121 ---- * The floating point version number ({@value}). */ ! public static final double VERSION_NUMBER = 1.5 ; *************** *** 124,128 **** * The type of version ({@value}). */ ! public final static String VERSION_TYPE = "Integration Build" ; --- 124,128 ---- * The type of version ({@value}). */ ! public static final String VERSION_TYPE = "Integration Build" ; *************** *** 131,146 **** * The date of the version ({@value}). */ ! public final static String VERSION_DATE = "Mar 13, 2005" ; /** * The display version ({@value}). */ ! public final static String ! VERSION_STRING = "" + VERSION_NUMBER + " (" + VERSION_TYPE + " " + VERSION_DATE + ")" ! ; ! ! // End of formatting /** --- 131,146 ---- * The date of the version ({@value}). */ ! public static final String VERSION_DATE = "Mar 13, 2005" ; + // End of formatting + /** * The display version ({@value}). */ ! public static final String VERSION_STRING = ! "" + VERSION_NUMBER ! + " (" + VERSION_TYPE + " " + VERSION_DATE + ")"; /** *************** *** 158,162 **** * Use this for no feedback. */ ! public static ParserFeedback noFeedback = new DefaultParserFeedback (DefaultParserFeedback.QUIET); /** --- 158,163 ---- * Use this for no feedback. */ ! public static final ParserFeedback DEVNULL = ! new DefaultParserFeedback (DefaultParserFeedback.QUIET); /** *************** *** 164,168 **** * Use this for output on <code>System.out</code>. */ ! public static ParserFeedback stdout = new DefaultParserFeedback (); // --- 165,169 ---- * Use this for output on <code>System.out</code>. */ ! public static final ParserFeedback STDOUT = new DefaultParserFeedback (); // *************** *** 243,247 **** public Parser () { ! this (new Lexer (new Page ("")), noFeedback); } --- 244,248 ---- public Parser () { ! this (new Lexer (new Page ("")), DEVNULL); } *************** *** 272,276 **** * method will be called so it need not be connected yet. * @param fb The object to use for message communication. ! * @throws ParserException If the creation of the underlying Lexer cannot be performed. */ public Parser (URLConnection connection, ParserFeedback fb) --- 273,278 ---- * method will be called so it need not be connected yet. * @param fb The object to use for message communication. ! * @throws ParserException If the creation of the underlying Lexer ! * cannot be performed. */ public Parser (URLConnection connection, ParserFeedback fb) *************** *** 283,287 **** /** * Creates a Parser object with the location of the resource (URL or file) ! * You would typically create a DefaultHTMLParserFeedback object and pass it in. * @see #Parser(URLConnection,ParserFeedback) * @param resourceLocn Either the URL or the filename (autodetects). --- 285,290 ---- /** * Creates a Parser object with the location of the resource (URL or file) ! * You would typically create a DefaultHTMLParserFeedback object and pass ! * it in. * @see #Parser(URLConnection,ParserFeedback) * @param resourceLocn Either the URL or the filename (autodetects). *************** *** 292,296 **** * @throws ParserException If the URL is invalid. */ ! public Parser (String resourceLocn, ParserFeedback feedback) throws ParserException { this (getConnectionManager ().openConnection (resourceLocn), feedback); --- 295,301 ---- * @throws ParserException If the URL is invalid. */ ! public Parser (String resourceLocn, ParserFeedback feedback) ! throws ! ParserException { this (getConnectionManager ().openConnection (resourceLocn), feedback); *************** *** 301,314 **** * A DefaultHTMLParserFeedback object is used for feedback. * @param resourceLocn Either the URL or the filename (autodetects). ! * @throws ParserException If the resourceLocn argument does not resolve to a valid page or file. */ public Parser (String resourceLocn) throws ParserException { ! this (resourceLocn, stdout); } /** * Construct a parser using the provided lexer. ! * A feedback object printing to {@link #stdout System.out} is used. * This would be used to create a parser for special cases where the * normal creation of a lexer on a URLConnection needs to be customized. --- 306,320 ---- * A DefaultHTMLParserFeedback object is used for feedback. * @param resourceLocn Either the URL or the filename (autodetects). ! * @throws ParserException If the resourceLocn argument does not resolve ! * to a valid page or file. */ public Parser (String resourceLocn) throws ParserException { ! this (resourceLocn, STDOUT); } /** * Construct a parser using the provided lexer. ! * A feedback object printing to {@link #STDOUT System.out} is used. * This would be used to create a parser for special cases where the * normal creation of a lexer on a URLConnection needs to be customized. *************** *** 317,321 **** public Parser (Lexer lexer) { ! this (lexer, stdout); } --- 323,327 ---- public Parser (Lexer lexer) { ! this (lexer, STDOUT); } *************** *** 325,337 **** * a special setup or negotiation conditioning beyond what is available * from the {@link #getConnectionManager ConnectionManager}. ! * A feedback object printing to {@link #stdout System.out} is used. * @see #Parser(URLConnection,ParserFeedback) * @param connection A fully conditioned connection. The connect() * method will be called so it need not be connected yet. ! * @throws ParserException If the creation of the underlying Lexer cannot be performed. */ public Parser (URLConnection connection) throws ParserException { ! this (connection, stdout); } --- 331,344 ---- * a special setup or negotiation conditioning beyond what is available * from the {@link #getConnectionManager ConnectionManager}. ! * A feedback object printing to {@link #STDOUT System.out} is used. * @see #Parser(URLConnection,ParserFeedback) * @param connection A fully conditioned connection. The connect() * method will be called so it need not be connected yet. ! * @throws ParserException If the creation of the underlying Lexer ! * cannot be performed. */ public Parser (URLConnection connection) throws ParserException { ! this (connection, STDOUT); } *************** *** 412,416 **** getLexer ().getPage ().setEncoding (encoding); } ! /** * Get the encoding for the page this parser is reading from. --- 419,423 ---- getLexer ().getPage ().setEncoding (encoding); } ! /** * Get the encoding for the page this parser is reading from. *************** *** 488,496 **** * Sets the feedback object used in scanning. * @param fb The new feedback object to use. If this is null a ! * {@link #noFeedback silent feedback object} is used. */ public void setFeedback (ParserFeedback fb) { ! mFeedback = (null == fb) ? noFeedback : fb; } --- 495,506 ---- * Sets the feedback object used in scanning. * @param fb The new feedback object to use. If this is null a ! * {@link #DEVNULL silent feedback object} is used. */ public void setFeedback (ParserFeedback fb) { ! if (null == fb) ! mFeedback = DEVNULL; ! else ! mFeedback = fb; } *************** *** 512,515 **** --- 522,534 ---- * This assumes support for a reset from the underlying * {@link org.htmlparser.lexer.Source} object. + * <p>This is cheaper (in terms of time) than resetting the URL, i.e. + * <pre> + * parser.setURL (parser.getURL ()); + * </pre> + * because the page is not refetched from the internet. + * <em>Note: the nodes returned on the second parse are new + * nodes and not the same nodes returned on the first parse. If you + * want the same nodes for re-use, collect them in a NodeList with + * {@link #parse(NodeFilter) parse(null)} and operate on the NodeList.</em> */ public void reset () *************** *** 552,562 **** * // ... * // process recursively (nodes within nodes) via getChildren() ! * NodeList list = tag.getChildren (); ! * if (null != list) ! * for (NodeIterator i = list.elements (); i.hasMoreElements (); ) * processMyNodes (i.nextNode ()); * } * } ! * * Parser parser = new Parser ("http://www.yahoo.com"); * for (NodeIterator i = parser.elements (); i.hasMoreElements (); ) --- 571,581 ---- * // ... * // process recursively (nodes within nodes) via getChildren() ! * NodeList nl = tag.getChildren (); ! * if (null != nl) ! * for (NodeIterator i = nl.elements (); i.hasMoreElements (); ) * processMyNodes (i.nextNode ()); * } * } ! * * Parser parser = new Parser ("http://www.yahoo.com"); * for (NodeIterator i = parser.elements (); i.hasMoreElements (); ) *************** *** 574,577 **** --- 593,620 ---- /** * Parse the given resource, using the filter provided. + * This can be used to extract information from specific nodes. + * When used with a <code>null</code> filter it returns an + * entire page which can then be modified and converted back to HTML + * (Note: the synthesis use-case is not handled very well; the parser + * is more often used to extract information from a web page). + * <p>For example, to replace the entire contents of the HEAD with a + * single TITLE tag you could do this: + * <pre> + * NodeList nl = parser.parse (null); // here is your two node list + * NodeList heads = nl.extractAllNodesThatMatch (new TagNameFilter ("HEAD")) + * if (heads.size () > 0) // there may not be a HEAD tag + * { + * Head head = heads.elementAt (0); // there should be only one + * head.removeAll (); // clean out the contents + * Tag title = new TitleTag (); + * title.setTagName ("title"); + * title.setChildren (new NodeList (new TextNode ("The New Title"))); + * Tag title_end = new TitleTag (); + * title_end.setTagName ("/title"); + * title.setEndTag (title_end); + * head.add (title); + * } + * System.out.println (nl.toHtml ()); // output the modified HTML + * </pre> * @return The list of matching nodes (for a <code>null</code> * filter this is all the top level nodes). *************** *** 595,599 **** ret.add (node); } ! return (ret); } --- 638,642 ---- ret.add (node); } ! return (ret); } *************** *** 606,615 **** * page and <code>finishedParsing()</code> is called after the processing. * @param visitor The visitor to visit all nodes with. ! * @throws ParserException If a parse error occurs while traversing the page with the visitor. */ ! public void visitAllNodesWith (NodeVisitor visitor) throws ParserException { Node node; visitor.beginParsing(); ! for (NodeIterator e = elements();e.hasMoreNodes();) { node = e.nextNode(); node.accept(visitor); --- 649,661 ---- * page and <code>finishedParsing()</code> is called after the processing. * @param visitor The visitor to visit all nodes with. ! * @throws ParserException If a parse error occurs while traversing ! * the page with the visitor. */ ! public void visitAllNodesWith (NodeVisitor visitor) throws ParserException ! { Node node; visitor.beginParsing(); ! for (NodeIterator e = elements(); e.hasMoreNodes(); ) ! { node = e.nextNode(); node.accept(visitor); *************** *** 621,625 **** * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. ! * @throws ParserException If a error occurs in setting up the underlying Lexer. */ public void setInputHTML (String inputHTML) --- 667,672 ---- * Initializes the parser with the given input HTML String. * @param inputHTML the input HTML that is to be parsed. ! * @throws ParserException If a error occurs in setting up the ! * underlying Lexer. */ public void setInputHTML (String inputHTML) *************** *** 642,650 **** * returned <code>true</code>. */ ! public NodeList extractAllNodesThatMatch (NodeFilter filter) throws ParserException { NodeIterator e; NodeList ret; ! ret = new NodeList (); for (e = elements (); e.hasMoreNodes (); ) --- 689,699 ---- * returned <code>true</code>. */ ! public NodeList extractAllNodesThatMatch (NodeFilter filter) ! throws ! ParserException { NodeIterator e; NodeList ret; ! ret = new NodeList (); for (e = elements (); e.hasMoreNodes (); ) *************** *** 656,664 **** /** * Convenience method to extract all nodes of a given class type. ! * Equivalent to <code>extractAllNodesThatMatch (new NodeClassFilter (nodeType))</code>. * @param nodeType The class of the nodes to collect. * @throws ParserException If a parse error occurs. * @return A list of nodes which have the class specified. ! * @deprecated Use extractAllNodesThatMatch (new NodeClassFilter (nodeType)). * @see #extractAllNodesThatAre */ --- 705,714 ---- /** * Convenience method to extract all nodes of a given class type. ! * Equivalent to ! * <code>extractAllNodesThatMatch (new NodeClassFilter (nodeType))</code>. * @param nodeType The class of the nodes to collect. * @throws ParserException If a parse error occurs. * @return A list of nodes which have the class specified. ! * @deprecated Use extractAllNodesThatMatch (new NodeClassFilter (cls)). * @see #extractAllNodesThatAre */ *************** *** 669,673 **** NodeList ret; ! ret = extractAllNodesThatMatch (new NodeClassFilter (nodeType)); return (ret.toNodeArray ()); --- 719,723 ---- NodeList ret; ! ret = extractAllNodesThatMatch (new NodeClassFilter (nodeType)); return (ret.toNodeArray ()); *************** *** 690,695 **** ParserException { ! if (null != getFeedback ()) ! getFeedback ().info (ConnectionManager.getRequestHeader (connection)); } --- 740,744 ---- ParserException { ! getFeedback ().info (ConnectionManager.getRequestHeader (connection)); } *************** *** 706,711 **** ParserException { ! if (null != getFeedback ()) ! getFeedback ().info (ConnectionManager.getResponseHeader (connection)); } --- 755,759 ---- ParserException { ! getFeedback ().info (ConnectionManager.getResponseHeader (connection)); } *************** *** 724,738 **** System.out.println ("HTML Parser v" + VERSION_STRING + "\n"); System.out.println (); ! System.out.println ("Syntax : java -jar htmlparser.jar <resourceLocn/website> [node_type]"); ! System.out.println (" <resourceLocn/website> the URL or file to be parsed"); ! System.out.println (" node_type an optional node name, for example:"); ! System.out.println (" A - Show only the link tags extracted from the document"); ! System.out.println (" IMG - Show only the image tags extracted from the document"); ! System.out.println (" TITLE - Extract the title from the document"); ! System.out.println (); ! System.out.println ("Example : java -jar htmlparser.jar http://www.yahoo.com"); System.out.println (); ! System.out.println ("For support, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page..."); ! System.out.println ("HTML Parser home page : http://htmlparser.org"); System.out.println (); } --- 772,785 ---- System.out.println ("HTML Parser v" + VERSION_STRING + "\n"); System.out.println (); ! System.out.println ("Syntax : java -jar htmlparser.jar" ! + " <file/page> [type]"); ! System.out.println (" <file/page> the URL or file to be parsed"); ! System.out.println (" type the node type, for example:"); ! System.out.println (" A - Show only the link tags"); ! System.out.println (" IMG - Show only the image tags"); ! System.out.println (" TITLE - Show only the title tag"); System.out.println (); ! System.out.println ("Example : java -jar htmlparser.jar" ! + " http://www.yahoo.com"); System.out.println (); } *************** *** 746,750 **** { // for a simple dump, use more verbose settings filter = null; ! parser.setFeedback (Parser.stdout); getConnectionManager ().setMonitor (parser); } --- 793,797 ---- { // for a simple dump, use more verbose settings filter = null; ! parser.setFeedback (Parser.STDOUT); getConnectionManager ().setMonitor (parser); } Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** Node.java 10 Apr 2005 23:20:42 -0000 1.52 --- Node.java 24 Apr 2005 17:48:27 -0000 1.53 *************** *** 37,41 **** * to define your own nodes to be returned by the * {@link org.htmlparser.lexer.Lexer} or {@link Parser}, but each of the types ! * must support this interface. * More specific interface requirements for each of the node types are specified * by the {@link Text}, {@link Remark} and {@link Tag} interfaces. --- 37,41 ---- * to define your own nodes to be returned by the * {@link org.htmlparser.lexer.Lexer} or {@link Parser}, but each of the types ! * must support this interface. * More specific interface requirements for each of the node types are specified * by the {@link Text}, {@link Remark} and {@link Tag} interfaces. *************** *** 65,69 **** * @return The text of this node including it's children. */ ! public abstract String toPlainTextString (); /** --- 65,69 ---- * @return The text of this node including it's children. */ ! String toPlainTextString (); /** *************** *** 71,75 **** * This should be the exact sequence of characters that were encountered by * the parser that caused this node to be created. Where this breaks down is ! * where broken nodes (tags and remarks) have been encountered and fixed. * Applications reproducing html can use this method on nodes which are to * be used or transferred as they were received or created. --- 71,75 ---- * This should be the exact sequence of characters that were encountered by * the parser that caused this node to be created. Where this breaks down is ! * where broken nodes (tags and remarks) have been encountered and fixed. * Applications reproducing html can use this method on nodes which are to * be used or transferred as they were received or created. *************** *** 77,86 **** * to be returned by the parser or lexer. */ ! public abstract String toHtml (); /** * Return the string representation of the node. * The return value may not be the entire contents of the node, and non- ! * printable characters may be translated in order to make them visible. * This is typically to be used in * the manner<br> --- 77,86 ---- * to be returned by the parser or lexer. */ ! String toHtml (); /** * Return the string representation of the node. * The return value may not be the entire contents of the node, and non- ! * printable characters may be translated in order to make them visible. * This is typically to be used in * the manner<br> *************** *** 92,110 **** * that isn't too large. */ ! public abstract String toString (); /** ! * Collect this node and its child nodes (if applicable) into a list, provided the node ! * satisfies the filtering criteria.<P> ! * ! * This mechanism allows powerful filtering code to be written very easily, ! * without bothering about collection of embedded tags separately. * e.g. when we try to get all the links on a page, it is not possible to * get it at the top-level, as many tags (like form tags), can contain * links embedded in them. We could get the links out by checking if the * current node is a {@link org.htmlparser.tags.CompositeTag}, and going ! * through its children. So this method provides a convenient way to do this.<P> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to * extract all links from a page would look like: * <pre> --- 92,109 ---- * that isn't too large. */ ! String toString (); /** ! * Collect this node and its child nodes into a list, provided the node ! * satisfies the filtering criteria. ! * <p>This mechanism allows powerful filtering code to be written very ! * easily, without bothering about collection of embedded tags separately. * e.g. when we try to get all the links on a page, it is not possible to * get it at the top-level, as many tags (like form tags), can contain * links embedded in them. We could get the links out by checking if the * current node is a {@link org.htmlparser.tags.CompositeTag}, and going ! * through its children. So this method provides a convenient way to do ! * this.</p> ! * <p>Using collectInto(), programs get a lot shorter. Now, the code to * extract all links from a page would look like: * <pre> *************** *** 115,121 **** * </pre> * Thus, <code>list</code> will hold all the link nodes, irrespective of how ! * deep the links are embedded.<P> ! * ! * Another way to accomplish the same objective is: * <pre> * NodeList list = new NodeList (); --- 114,119 ---- * </pre> * Thus, <code>list</code> will hold all the link nodes, irrespective of how ! * deep the links are embedded.</p> ! * <p>Another way to accomplish the same objective is: * <pre> * NodeList list = new NodeList (); *************** *** 128,134 **** * @param list The list to collect nodes into. * @param filter The criteria to use when deciding if a node should ! * be added to the list. */ ! public abstract void collectInto (NodeList list, NodeFilter filter); /** --- 126,132 ---- * @param list The list to collect nodes into. * @param filter The criteria to use when deciding if a node should ! * be added to the list.</p> */ ! void collectInto (NodeList list, NodeFilter filter); /** *************** *** 137,141 **** * @return The start position. */ ! public abstract int getStartPosition (); /** --- 135,139 ---- * @return The start position. */ ! int getStartPosition (); /** *************** *** 143,147 **** * @param position The new start position. */ ! public abstract void setStartPosition (int position); /** --- 141,145 ---- * @param position The new start position. */ ! void setStartPosition (int position); /** *************** *** 151,155 **** * @return The end position. */ ! public abstract int getEndPosition (); /** --- 149,153 ---- * @return The end position. */ ! int getEndPosition (); /** *************** *** 157,161 **** * @param position The new end position. */ ! public abstract void setEndPosition (int position); /** --- 155,159 ---- * @param position The new end position. */ ! void setEndPosition (int position); /** *************** *** 163,167 **** * @return The page that supplied this node. */ ! public Page getPage (); /** --- 161,165 ---- * @return The page that supplied this node. */ ! Page getPage (); /** *************** *** 169,178 **** * @param page The page that supplied this node. */ ! public void setPage (Page page); /** * Apply the visitor to this node. * @param visitor The visitor to this node. */ ! public abstract void accept (NodeVisitor visitor); /** --- 167,177 ---- * @param page The page that supplied this node. */ ! void setPage (Page page); ! /** * Apply the visitor to this node. * @param visitor The visitor to this node. */ ! void accept (NodeVisitor visitor); /** *************** *** 186,190 **** * otherwise. */ ! public abstract Node getParent (); /** --- 185,189 ---- * otherwise. */ ! Node getParent (); /** *************** *** 192,196 **** * @param node The node that contains this node. */ ! public abstract void setParent (Node node); /** --- 191,195 ---- * @param node The node that contains this node. */ ! void setParent (Node node); /** *************** *** 199,203 **** * <code>null</code> otherwise. */ ! public abstract NodeList getChildren (); /** --- 198,202 ---- * <code>null</code> otherwise. */ ! NodeList getChildren (); /** *************** *** 205,209 **** * @param children The new list of children this node contains. */ ! public abstract void setChildren (NodeList children); /** --- 204,208 ---- * @param children The new list of children this node contains. */ ! void setChildren (NodeList children); /** *************** *** 212,216 **** * a tag, the contents of the tag less the enclosing angle brackets. */ ! public String getText (); /** --- 211,215 ---- * a tag, the contents of the tag less the enclosing angle brackets. */ ! String getText (); /** *************** *** 218,222 **** * @param text The new text for the node. */ ! public void setText (String text); /** --- 217,221 ---- * @param text The new text for the node. */ ! void setText (String text); /** *************** *** 231,236 **** * composite nodes (those that contain other nodes), the children will have * already been parsed and will be available via {@link #getChildren}. */ ! public void doSemanticAction () throws ParserException; --- 230,237 ---- * composite nodes (those that contain other nodes), the children will have * already been parsed and will be available via {@link #getChildren}. + * @exception ParserException If a problem is encountered performing the + * semantic action. */ ! void doSemanticAction () throws ParserException; *************** *** 242,247 **** /** * Allow cloning of nodes. ! * Creates and returns a copy of this object. The precise meaning ! * of "copy" may depend on the class of the object. The general * intent is that, for any object <tt>x</tt>, the expression: * <blockquote> --- 243,248 ---- /** * Allow cloning of nodes. ! * Creates and returns a copy of this object. The precise meaning ! * of "copy" may depend on the class of the object. The general * intent is that, for any object <tt>x</tt>, the expression: * <blockquote> *************** *** 252,261 **** * <pre> * x.clone().getClass() == x.getClass()</pre></blockquote> ! * will be <tt>true</tt>, but these are not absolute requirements. * While it is typically the case that: * <blockquote> * <pre> * x.clone().equals(x)</pre></blockquote> ! * will be <tt>true</tt>, this is not an absolute requirement. * <p> * By convention, the returned object should be obtained by calling --- 253,262 ---- * <pre> * x.clone().getClass() == x.getClass()</pre></blockquote> ! * will be <tt>true</tt>, but these are not absolute requirements. * While it is typically the case that: * <blockquote> * <pre> * x.clone().equals(x)</pre></blockquote> ! * will be <tt>true</tt>, this is not an absolute requirement. * <p> * By convention, the returned object should be obtained by calling *************** *** 275,291 **** * need to be modified. * <p> ! * The method <tt>clone</tt> for class <tt>Object</tt> performs a ! * specific cloning operation. First, if the class of this object does ! * not implement the interface <tt>Cloneable</tt>, then a ! * <tt>CloneNotSupportedException</tt> is thrown. Note that all arrays ! * are considered to implement the interface <tt>Cloneable</tt>. ! * Otherwise, this method creates a new instance of the class of this ! * object and initializes all its fields with exactly the contents of * the corresponding fields of this object, as if by assignment; the ! * contents of the fields are not themselves cloned. Thus, this method * performs a "shallow copy" of this object, not a "deep copy" operation. * <p> ! * The class <tt>Object</tt> does not itself implement the interface ! * <tt>Cloneable</tt>, so calling the <tt>clone</tt> method on an object * whose class is <tt>Object</tt> will result in throwing an * exception at run time. --- 276,292 ---- * need to be modified. * <p> ! * The method <tt>clone</tt> for class <tt>Object</tt> performs a ! * specific cloning operation. First, if the class of this object does ! * not implement the interface <tt>Cloneable</tt>, then a ! * <tt>CloneNotSupportedException</tt> is thrown. Note that all arrays ! * are considered to implement the interface <tt>Cloneable</tt>. ! * Otherwise, this method creates a new instance of the class of this ! * object and initializes all its fields with exactly the contents of * the corresponding fields of this object, as if by assignment; the ! * contents of the fields are not themselves cloned. Thus, this method * performs a "shallow copy" of this object, not a "deep copy" operation. * <p> ! * The class <tt>Object</tt> does not itself implement the interface ! * <tt>Cloneable</tt>, so calling the <tt>clone</tt> method on an object * whose class is <tt>Object</tt> will result in throwing an * exception at run time. *************** *** 299,303 **** * @see java.lang.Cloneable */ ! public Object clone () throws CloneNotSupportedException; --- 300,304 ---- * @see java.lang.Cloneable */ ! Object clone () throws CloneNotSupportedException; Index: PrototypicalNodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/PrototypicalNodeFactory.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** PrototypicalNodeFactory.java 10 Apr 2005 23:20:42 -0000 1.14 --- PrototypicalNodeFactory.java 24 Apr 2005 17:48:27 -0000 1.15 *************** *** 34,42 **** import java.util.Vector; - import org.htmlparser.Attribute; - import org.htmlparser.NodeFactory; - import org.htmlparser.Remark; - import org.htmlparser.Tag; - import org.htmlparser.Text; import org.htmlparser.lexer.Page; import org.htmlparser.nodes.TextNode; --- 34,37 ---- *************** *** 73,77 **** import org.htmlparser.tags.TextareaTag; import org.htmlparser.tags.TitleTag; - import org.htmlparser.util.ParserException; /** --- 68,71 ---- *************** *** 98,102 **** * explicitly.</p> * <p>Here is an example of how to override all text issued from ! * {@link org.htmlparser.nodes.TextNode#toPlainTextString() Text.toPlainTextString()}, * in this case decoding (converting character references), * which illustrates the use of setting the text prototype: --- 92,97 ---- * explicitly.</p> * <p>Here is an example of how to override all text issued from ! * {@link org.htmlparser.nodes.TextNode#toPlainTextString() ! * Text.toPlainTextString()}, * in this case decoding (converting character references), * which illustrates the use of setting the text prototype: *************** *** 108,112 **** * public String toPlainTextString() * { ! * return (org.htmlparser.util.Translate.decode (super.toPlainTextString ())); * } * }); --- 103,108 ---- * public String toPlainTextString() * { ! * String original = super.toPlainTextString (); ! * return (org.htmlparser.util.Translate.decode (original)); * } * }); *************** *** 208,211 **** --- 204,209 ---- * Adds a tag to the registry. * @param id The name under which to register the tag. + * <strong>For proper operation, the id should be uppercase so it + * will be matched by a Map lookup.</strong> * @param tag The tag to be returned from a {@link #createTagNode} call. * @return The tag previously registered with that id if any, *************** *** 259,272 **** * Register a tag. * Registers the given tag under every {@link Tag#getIds() id} that the ! * tag has. * @param tag The tag to register. */ public void registerTag (Tag tag) { ! String ids[]; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! put (ids[i], tag); } --- 257,272 ---- * Register a tag. * Registers the given tag under every {@link Tag#getIds() id} that the ! * tag has (i.e. all names returned by {@link Tag#getIds() tag.getIds()}. ! * <p><strong>For proper operation, the ids are converted to uppercase so ! * they will be matched by a Map lookup.</strong> * @param tag The tag to register. */ public void registerTag (Tag tag) { ! String[] ids; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! put (ids[i].toUpperCase (Locale.ENGLISH), tag); } *************** *** 274,286 **** * Unregister a tag. * Unregisters the given tag from every {@link Tag#getIds() id} the tag has. * @param tag The tag to unregister. */ public void unregisterTag (Tag tag) { ! String ids[]; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! remove (ids[i]); } --- 274,288 ---- * Unregister a tag. * Unregisters the given tag from every {@link Tag#getIds() id} the tag has. + * <p><strong>The ids are converted to uppercase to undo the operation + * of registerTag.</strong> * @param tag The tag to unregister. */ public void unregisterTag (Tag tag) { ! String[] ids; ! ids = tag.getIds (); for (int i = 0; i < ids.length; i++) ! remove (ids[i].toUpperCase (Locale.ENGLISH)); } *************** *** 323,327 **** registerTag (new HeadTag ()); registerTag (new Html ()); ! return (this); } --- 325,329 ---- registerTag (new HeadTag ()); registerTag (new Html ()); ! return (this); } *************** *** 440,444 **** { Remark ret; ! try { --- 442,446 ---- { Remark ret; ! try { *************** *** 503,507 **** catch (CloneNotSupportedException cnse) { ! // default to creating a new one } } --- 505,509 ---- catch (CloneNotSupportedException cnse) { ! // default to creating a generic one } } |