Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser Node.java,1.50,1.51 PrototypicalNodeFactory.java,1.10
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-07-02 00:50:06
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32670/src/org/htmlparser Modified Files: Node.java PrototypicalNodeFactory.java Tag.java Log Message: Part four of a multiphase refactoring. Most internals now use the Tag interface. This interface has been broadened to add set/get scanner and set/get endtag. Removed the org.htmlparser.tags.Tag class and moved the remaining (minor) functionality to the TagNode class. So now tags inherit directly from TagNode or CompositeTag. ** NOTE: If you have subclassed org.htmlparser.tags.Tag, use org.htmlparser.nodes.TagNode now.** Removed deprecated methods getTagBegin/getTagEnd and deleted unused classes: PeekingIterator and it's Implementation. Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.50 retrieving revision 1.51 diff -C2 -d -r1.50 -r1.51 *** Node.java 14 Jun 2004 00:06:51 -0000 1.50 --- Node.java 2 Jul 2004 00:49:26 -0000 1.51 *************** *** 27,30 **** --- 27,31 ---- package org.htmlparser; + import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; *************** *** 171,174 **** --- 172,186 ---- /** + * Get the page this node came from. + * @return The page that supplied this node. + */ + public Page getPage (); + + /** + * Set the page this node came from. + * @param page The page that supplied this node. + */ + public void setPage (Page page); + /** * Apply the visitor to this node. * @param visitor The visitor to this node. Index: PrototypicalNodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/PrototypicalNodeFactory.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** PrototypicalNodeFactory.java 26 Jun 2004 11:25:01 -0000 1.10 --- PrototypicalNodeFactory.java 2 Jul 2004 00:49:26 -0000 1.11 *************** *** 43,46 **** --- 43,47 ---- import org.htmlparser.nodes.TextNode; import org.htmlparser.nodes.RemarkNode; + import org.htmlparser.nodes.TagNode; import org.htmlparser.tags.AppletTag; import org.htmlparser.tags.BaseHrefTag; *************** *** 106,109 **** --- 107,115 ---- /** + * The prototypical tag node. + */ + protected Tag mTag; + + /** * The list of tags to return. * The list is keyed by tag name. *************** *** 129,132 **** --- 135,139 ---- mText = new TextNode (null, 0, 0); mRemark = new RemarkNode (null, 0, 0); + mTag = new TagNode (null, 0, 0, null); if (!empty) registerTags (); *************** *** 137,141 **** * @param tag The single tag to register in the otherwise empty factory. */ ! public PrototypicalNodeFactory (org.htmlparser.tags.Tag tag) { this (true); --- 144,148 ---- * @param tag The single tag to register in the otherwise empty factory. */ ! public PrototypicalNodeFactory (Tag tag) { this (true); *************** *** 147,151 **** * @param tags The tags to register in the otherwise empty factory. */ ! public PrototypicalNodeFactory (org.htmlparser.tags.Tag[] tags) { this (true); --- 154,158 ---- * @param tags The tags to register in the otherwise empty factory. */ ! public PrototypicalNodeFactory (Tag[] tags) { this (true); *************** *** 207,213 **** * Registers the given tag under every id the tag has. * @param tag The tag to register (subclass of ! * {@link org.htmlparser.tags.Tag}). */ ! public void registerTag (org.htmlparser.tags.Tag tag) { String ids[]; --- 214,220 ---- * Registers the given tag under every id the tag has. * @param tag The tag to register (subclass of ! * {@link Tag}). */ ! public void registerTag (Tag tag) { String ids[]; *************** *** 222,228 **** * Unregisters the given tag from every id the tag has. * @param tag The tag to unregister (subclass of ! * {@link org.htmlparser.tags.Tag}). */ ! public void unregisterTag (org.htmlparser.tags.Tag tag) { String ids[]; --- 229,235 ---- * Unregisters the given tag from every id the tag has. * @param tag The tag to unregister (subclass of ! * {@link Tag}). */ ! public void unregisterTag (Tag tag) { String ids[]; *************** *** 234,260 **** /** - * Register a tag. - * Registers the given tag under the tag {@link Tag#getTagName() name}. - * @param tag The tag to register (implements {@link org.htmlparser.Tag}). - */ - public void registerTag (Tag tag) - { - put (tag.getTagName (), tag); - } - - /** - * Unregister a tag. - * Unregisters the given tag from the tag {@link Tag#getTagName() name}. - * @param tag The tag to unregister (implements {@link org.htmlparser.Tag}). - */ - public void unregisterTag (Tag tag) - { - remove (tag.getTagName ()); - } - - /** * Register all known tags in the tag package. * Registers tags from the {@link org.htmlparser.tags tag package} by ! * calling {@link #registerTag(org.htmlparser.tags.Tag) registerTag()}. * @return 'this' nodefactory as a convenience. */ --- 241,247 ---- /** * Register all known tags in the tag package. * Registers tags from the {@link org.htmlparser.tags tag package} by ! * calling {@link #registerTag(Tag) registerTag()}. * @return 'this' nodefactory as a convenience. */ *************** *** 337,340 **** --- 324,352 ---- } + /** + * Get the object being used to generate generic tag nodes. + * These are returned from {@link createTagNode} when no specific tag + * is found in the registered tag list. + * @return The prototype for {@link Tag} nodes. + */ + public Tag getTagPrototype () + { + return (mTag); + } + + /** + * Set the object to be used to generate tag nodes. + * These are returned from {@link createTagNode} when no specific tag + * is found in the registered tag list. + * @param remark The prototype for {@link Tag} nodes. + */ + public void setTagPrototype (Tag tag) + { + if (null == tag) + throw new IllegalArgumentException ("tag prototype node cannot be null"); + else + mTag = tag; + } + // // NodeFactory interface *************** *** 354,361 **** { ret = (Text)(getTextPrototype ().clone ()); ! if (ret instanceof AbstractNode) ! ((AbstractNode)ret).setPage (page); ! else ! ret.setText (page.getText (start, end)); ret.setStartPosition (start); ret.setEndPosition (end); --- 366,370 ---- { ret = (Text)(getTextPrototype ().clone ()); ! ret.setPage (page); ret.setStartPosition (start); ret.setEndPosition (end); *************** *** 384,398 **** { ret = (Remark)(getRemarkPrototype ().clone ()); ! if (ret instanceof AbstractNode) ! ((AbstractNode)ret).setPage (page); ! else ! { ! first = start + 4; // <!-- ! last = end - 3; // --> ! if (first >= last) ! ret.setText (""); ! else ! ret.setText (page.getText (first, last)); ! } ret.setStartPosition (start); ret.setEndPosition (end); --- 393,397 ---- { ret = (Remark)(getRemarkPrototype ().clone ()); ! ret.setPage (page); ret.setStartPosition (start); ret.setEndPosition (end); *************** *** 445,450 **** { ret = (Tag)prototype.clone (); ! if (ret instanceof AbstractNode) ! ((AbstractNode)ret).setPage (page); ret.setStartPosition (start); ret.setEndPosition (end); --- 444,448 ---- { ret = (Tag)prototype.clone (); ! ret.setPage (page); ret.setStartPosition (start); ret.setEndPosition (end); *************** *** 460,465 **** } if (null == ret) ! // generate a generic node ! ret = new org.htmlparser.tags.Tag (page, start, end, attributes); return (ret); --- 458,475 ---- } if (null == ret) ! { // generate a generic node ! try ! { ! ret = (Tag)getTagPrototype ().clone (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! } ! catch (CloneNotSupportedException cnse) ! { ! ret = new TagNode (page, start, end, attributes); ! } ! } return (ret); Index: Tag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Tag.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Tag.java 24 May 2004 16:18:12 -0000 1.3 --- Tag.java 2 Jul 2004 00:49:26 -0000 1.4 *************** *** 27,32 **** --- 27,35 ---- package org.htmlparser; + import java.util.Hashtable; import java.util.Vector; + import org.htmlparser.scanners.Scanner; + /** * Identifies what a Tag such as <XXX xxx yyy="zzz"> can do. *************** *** 96,99 **** --- 99,128 ---- */ public void setAttributesEx (Vector attribs); + + /** + * Gets the attributes in the tag. + * This is not the preferred method to get attributes, see {@link + * #getAttributesEx getAttributesEx} which returns a list of {@link + * Attribute} objects, which offer more information than the simple + * <code>String</code> objects available from this <code>Hashtable</code>. + * @return Returns a list of name/value pairs representing the attributes. + * These are not in order, the keys (names) are converted to uppercase and the values + * are not quoted, even if they need to be. The table <em>will</em> return + * <code>null</code> if there was no value for an attribute (no equals + * sign or nothing to the right of the equals sign). A special entry with + * a key of SpecialHashtable.TAGNAME ("$<TAGNAME>$") holds the tag name. + * The conversion to uppercase is performed with an ENGLISH locale. + * @deprecated Use getAttributesEx() instead. + */ + public Hashtable getAttributes (); + + /** + * Sets the attributes. + * A special entry with a key of SpecialHashtable.TAGNAME ("$<TAGNAME>$") + * sets the tag name. + * @param attributes The attribute collection to set. + * @deprecated Use setAttributesEx() instead. + */ + public void setAttributes (Hashtable attributes); /** *************** *** 119,122 **** --- 148,158 ---- /** + * Return the name of this tag. + * @return The tag name or null if this tag contains nothing or only + * whitespace. + */ + public String getRawTagName (); + + /** * Determines if the given tag breaks the flow of text. * @return <code>true</code> if following text would start on a new line, *************** *** 152,154 **** --- 188,254 ---- */ public void setEmptyXmlTag (boolean emptyXmlTag); + + /** + * Return the set of names handled by this tag. + * Since this a a generic tag, it has no ids. + * @return The names to be matched that create tags of this type. + */ + public String[] getIds (); + + /** + * Return the set of tag names that cause this tag to finish. + * These are the normal (non end tags) that if encountered while + * scanning (a composite tag) will cause the generation of a virtual + * tag. + * Since this a a non-composite tag, the default is no enders. + * @return The names of following tags that stop further scanning. + */ + public String[] getEnders (); + + /** + * Return the set of end tag names that cause this tag to finish. + * These are the end tags that if encountered while + * scanning (a composite tag) will cause the generation of a virtual + * tag. + * Since this a a non-composite tag, it has no end tag enders. + * @return The names of following end tags that stop further scanning. + */ + public String[] getEndTagEnders (); + + /** + * Get the end tag for this (composite) tag. + * For a non-composite tag this always returns <code>null</code>. + * @return The tag that terminates this composite tag, i.e. </HTML>. + */ + public Tag getEndTag (); + + /** + * Set the end tag for this (composite) tag. + * For a non-composite tag this is a no-op. + * @param end The tag that terminates this composite tag, i.e. </HTML>. + */ + public void setEndTag (Tag end); + + /** + * Return the scanner associated with this tag. + * @return The scanner associated with this tag. + */ + public Scanner getThisScanner (); + + /** + * Set the scanner associated with this tag. + * @param scanner The scanner for this tag. + */ + public void setThisScanner (Scanner scanner); + + /** + * Get the line number where this tag starts. + * @return The (zero based) line number in the page where this tag starts. + */ + public int getStartingLineNumber (); + /** + * Get the line number where this tag ends. + * @return The (zero based) line number in the page where this tag ends. + */ + public int getEndingLineNumber (); } |