[Htmlparser-cvs] htmlparser/src/org/htmlparser/tags CompositeTag.java,1.66,1.67 FormTag.java,1.41,1.
Brought to you by:
derrickoswald
From: <der...@us...> - 2003-12-07 23:42:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv16537/tags Modified Files: CompositeTag.java FormTag.java ImageTag.java InputTag.java LabelTag.java LinkTag.java MetaTag.java SelectTag.java TableColumn.java TableRow.java TextareaTag.java Log Message: Remove most of the scanners. The only scanners left are ones that really do something different (script and jsp). Instead of registering a scanner to enable returning a specific tag you now add a tag to the a PrototypicalNodeFactory. All known tags are 'registered' by default in a new Parser which is similar to having called the old 'registerDOMScanners()', so tags are fully nested. This is different behaviour, and specifically, you will need to recurse into returned nodes to get at what you want. I've tried to adjust the applications accordingly, but worked examples are still scarce. If you want to return only some of the derived tags while keeping most as generic tags, there are various constructors and manipulators on the factory. See the javadocs and examples in the tests package. Nearly all the old scanner tests are folded into the tag tests. toString() has been revamped. This means that the default Parser mainline now returns an indented listing of tags, making it easy to see the structure of a page. The downside is the text of the page had to have newlines, tabs etc. turned into escape sequences. But if you were really interested in content you would be using toHtml() or toPlainTextString(). Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.66 retrieving revision 1.67 diff -C2 -d -r1.66 -r1.67 *** CompositeTag.java 9 Nov 2003 17:07:11 -0000 1.66 --- CompositeTag.java 7 Dec 2003 23:41:41 -0000 1.67 *************** *** 434,436 **** --- 434,491 ---- return stringNode; } + + public String toString () + { + StringBuffer ret; + + ret = new StringBuffer (1024); + toString (0, ret); + + return (ret.toString ()); + } + + /** + * Return the text contained in this tag. + * @return The complete contents of the tag (within the angle brackets). + */ + public String getText () + { + String ret; + + ret = super.toHtml (); + ret = ret.substring (1, ret.length () - 1); + + return (ret); + } + + public void toString (int level, StringBuffer buffer) + { + Node node; + + for (int i = 0; i < level; i++) + buffer.append (" "); + buffer.append (super.toString ()); + buffer.append (System.getProperty ("line.separator")); + for (SimpleNodeIterator e = children (); e.hasMoreNodes ();) + { + node = e.nextNode (); + if (node instanceof CompositeTag) + ((CompositeTag)node).toString (level + 1, buffer); + else + { + for (int i = 0; i <= level; i++) + buffer.append (" "); + buffer.append (node); + buffer.append (System.getProperty ("line.separator")); + } + } + // eliminate virtual tags + // if (!(getEndTag ().getStartPosition () == getEndTag ().getEndPosition ())) + { + for (int i = 0; i <= level; i++) + buffer.append (" "); + buffer.append (getEndTag ().toString ()); + buffer.append (System.getProperty ("line.separator")); + } + } } Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** FormTag.java 9 Nov 2003 17:07:11 -0000 1.41 --- FormTag.java 7 Dec 2003 23:41:41 -0000 1.42 *************** *** 56,60 **** * The set of end tag names that indicate the end of this tag. */ ! private static final String[] mEndTagEnders = new String[] {"HTML", "BODY"}; /** --- 56,60 ---- * The set of end tag names that indicate the end of this tag. */ ! private static final String[] mEndTagEnders = new String[] {"HTML", "BODY", "TABLE"}; /** Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** ImageTag.java 9 Nov 2003 17:07:11 -0000 1.36 --- ImageTag.java 7 Dec 2003 23:41:41 -0000 1.37 *************** *** 188,196 **** } - public String toString() - { - return "IMAGE TAG : Image at " + getImageURL () +"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); - } - public void setImageURL (String url) { --- 188,191 ---- Index: InputTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/InputTag.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** InputTag.java 9 Nov 2003 17:07:11 -0000 1.31 --- InputTag.java 7 Dec 2003 23:41:41 -0000 1.32 *************** *** 56,62 **** return (mIds); } - - public String toString() { - return (ParserUtils.toString(this)); - } } --- 56,58 ---- Index: LabelTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LabelTag.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** LabelTag.java 9 Nov 2003 17:07:11 -0000 1.32 --- LabelTag.java 7 Dec 2003 23:41:41 -0000 1.33 *************** *** 43,47 **** /** ! * Create a new lavel tag. */ public LabelTag () --- 43,47 ---- /** ! * Create a new label tag. */ public LabelTag () Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** LinkTag.java 9 Nov 2003 17:07:11 -0000 1.44 --- LinkTag.java 7 Dec 2003 23:41:41 -0000 1.45 *************** *** 31,35 **** import org.htmlparser.Node; - import org.htmlparser.scanners.LinkScanner; import org.htmlparser.util.ParserUtils; import org.htmlparser.util.SimpleNodeIterator; --- 31,34 ---- Index: MetaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/MetaTag.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** MetaTag.java 9 Nov 2003 17:07:11 -0000 1.32 --- MetaTag.java 7 Dec 2003 23:41:41 -0000 1.33 *************** *** 118,129 **** } } - - public String toString() - { - return "META TAG\n"+ - "--------\n"+ - "Http-Equiv : "+getHttpEquiv()+"\n"+ - "Name : "+ getMetaTagName() +"\n"+ - "Contents : "+getMetaContent()+"\n"; - } } --- 118,120 ---- Index: SelectTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/SelectTag.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** SelectTag.java 9 Nov 2003 17:07:11 -0000 1.33 --- SelectTag.java 7 Dec 2003 23:41:41 -0000 1.34 *************** *** 99,122 **** return (ret); } - - public String toString() - { - StringBuffer lString; - NodeList children; - Node node; - - lString = new StringBuffer(ParserUtils.toString(this)); - children = getChildren (); - for(int i=0;i<children.size(); i++) - { - node = children.elementAt(i); - if (node instanceof OptionTag) - { - OptionTag optionTag = (OptionTag)node; - lString.append(optionTag.toString()).append("\n"); - } - } - - return lString.toString(); - } } --- 99,101 ---- Index: TableColumn.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableColumn.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** TableColumn.java 9 Nov 2003 17:07:11 -0000 1.33 --- TableColumn.java 7 Dec 2003 23:41:41 -0000 1.34 *************** *** 40,43 **** --- 40,53 ---- /** + * The set of tag names that indicate the end of this tag. + */ + private static final String[] mEnders = new String[] {"TD", "TR"}; + + /** + * The set of end tag names that indicate the end of this tag. + */ + private static final String[] mEndTagEnders = new String[] {"TR", "TABLE"}; + + /** * Create a new table column tag. */ *************** *** 62,65 **** --- 72,84 ---- { return (mIds); + } + + /** + * Return the set of end tag names that cause this tag to finish. + * @return The names of following end tags that stop further scanning. + */ + public String[] getEndTagEnders () + { + return (mEndTagEnders); } } Index: TableRow.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableRow.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** TableRow.java 9 Nov 2003 17:07:11 -0000 1.35 --- TableRow.java 7 Dec 2003 23:41:41 -0000 1.36 *************** *** 42,45 **** --- 42,50 ---- /** + * The set of end tag names that indicate the end of this tag. + */ + private static final String[] mEndTagEnders = new String[] {"TABLE"}; + + /** * Create a new table row tag. */ *************** *** 64,67 **** --- 69,81 ---- { return (mIds); + } + + /** + * Return the set of end tag names that cause this tag to finish. + * @return The names of following end tags that stop further scanning. + */ + public String[] getEndTagEnders () + { + return (mEndTagEnders); } Index: TextareaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TextareaTag.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** TextareaTag.java 9 Nov 2003 17:07:11 -0000 1.30 --- TextareaTag.java 7 Dec 2003 23:41:41 -0000 1.31 *************** *** 88,98 **** return toPlainTextString(); } - - public String toString() - { - StringBuffer buff = new StringBuffer(ParserUtils.toString(this)); - buff.append("VALUE : ").append(getValue()).append("\n"); - - return buff.toString(); - } } --- 88,90 ---- |