htmlparser-cvs Mailing List for HTML Parser (Page 31)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <der...@us...> - 2003-11-01 21:55:49
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv2656/tests/tagTests Modified Files: ScriptTagTest.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: ScriptTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ScriptTagTest.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** ScriptTagTest.java 1 Nov 2003 01:36:57 -0000 1.37 --- ScriptTagTest.java 1 Nov 2003 21:55:44 -0000 1.38 *************** *** 67,71 **** ScriptTag scriptTag = (ScriptTag)node[0]; assertEquals("Script Tag Begin",0,scriptTag.getStartPosition ()); ! assertEquals("Script Tag End",28,scriptTag.getEndPosition ()); assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); } --- 67,71 ---- ScriptTag scriptTag = (ScriptTag)node[0]; assertEquals("Script Tag Begin",0,scriptTag.getStartPosition ()); ! assertEquals("Script Tag End",28,scriptTag.getEndTag ().getEndPosition ()); assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); } |
From: <der...@us...> - 2003-11-01 21:55:49
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1:/tmp/cvs-serv2656/visitors Modified Files: UrlModifyingVisitor.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: UrlModifyingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/UrlModifyingVisitor.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** UrlModifyingVisitor.java 29 Oct 2003 03:31:18 -0000 1.36 --- UrlModifyingVisitor.java 1 Nov 2003 21:55:44 -0000 1.37 *************** *** 44,48 **** public UrlModifyingVisitor(Parser parser, String linkPrefix) { ! super(true,false); this.parser = parser; parser.addScanner(new LinkScanner()); --- 44,48 ---- public UrlModifyingVisitor(Parser parser, String linkPrefix) { ! super(true,true); this.parser = parser; parser.addScanner(new LinkScanner()); *************** *** 66,75 **** public void visitTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); } public void visitEndTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); } --- 66,77 ---- public void visitTag(Tag tag) { ! if (null == tag.getParent ()) ! modifiedResult.append(tag.toHtml()); } public void visitEndTag(Tag tag) { ! if (null == tag.getParent ()) ! modifiedResult.append(tag.toHtml()); } |
From: <der...@us...> - 2003-11-01 21:55:47
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv2656/tests/scannersTests Modified Files: CompositeTagScannerTest.java FormScannerTest.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.50 retrieving revision 1.51 diff -C2 -d -r1.50 -r1.51 *** CompositeTagScannerTest.java 1 Nov 2003 04:03:21 -0000 1.50 --- CompositeTagScannerTest.java 1 Nov 2003 21:55:43 -0000 1.51 *************** *** 30,33 **** --- 30,34 ---- import java.util.Vector; + import org.htmlparser.AbstractNode; import org.htmlparser.Node; import org.htmlparser.StringNode; *************** *** 135,139 **** assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("custom tag starting loc",0,customTag.getStartPosition ()); ! assertEquals("custom tag ending loc",24,customTag.getEndPosition ()); Node child = customTag.childAt(0); --- 136,140 ---- assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("custom tag starting loc",0,customTag.getStartPosition ()); ! assertEquals("custom tag ending loc",24,customTag.getEndTag ().getEndPosition ()); Node child = customTag.childAt(0); *************** *** 156,160 **** assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("custom tag starting loc",0,customTag.getStartPosition ()); ! assertEquals("custom tag ending loc",27,customTag.getEndPosition ()); Node child = customTag.childAt(0); --- 157,161 ---- assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("custom tag starting loc",0,customTag.getStartPosition ()); ! assertEquals("custom tag ending loc",27,customTag.getEndTag ().getEndPosition ()); Node child = customTag.childAt(0); *************** *** 291,295 **** assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); ! assertEquals("ending line position",1,customTag.getEndingLineNumber()); assertStringEquals("html", html + "</custom>", customTag.toHtml() ); --- 292,296 ---- assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); ! assertEquals("ending line position",1,customTag.getEndTag ().getEndingLineNumber()); assertStringEquals("html", html + "</custom>", customTag.toHtml() ); *************** *** 307,313 **** assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); ! assertEquals("ending loc of custom tag",17,customTag.getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); ! assertEquals("ending line position",0,customTag.getEndingLineNumber()); assertStringEquals("1st custom tag", tag1 + "</custom>", customTag.toHtml()); customTag = (CustomTag)node[1]; --- 308,314 ---- assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); ! assertEquals("ending loc of custom tag",17,customTag.getEndTag ().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); ! assertEquals("ending line position",0,customTag.getEndTag ().getEndingLineNumber()); assertStringEquals("1st custom tag", tag1 + "</custom>", customTag.toHtml()); customTag = (CustomTag)node[1]; *************** *** 334,338 **** assertEquals("ending loc",17,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); ! assertEquals("ending line position",1,customTag.getEndingLineNumber()); assertStringEquals("another tag html",another + "</another>",anotherTag.toHtml()); assertStringEquals("custom tag html",custom,customTag.toHtml()); --- 335,339 ---- assertEquals("ending loc",17,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); ! assertEquals("ending line position",1,customTag.getEndTag ().getEndingLineNumber()); assertStringEquals("another tag html",another + "</another>",anotherTag.toHtml()); assertStringEquals("custom tag html",custom,customTag.toHtml()); *************** *** 406,410 **** AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); assertEquals("anotherTag child count",2,anotherTag.getChildCount()); ! assertEquals("anotherTag end loc",27,anotherTag.getEndPosition ()); assertEquals("custom end tag begin loc",27,customTag.getEndTag().getStartPosition ()); assertEquals("custom end tag end loc",36,customTag.getEndTag().getEndPosition ()); --- 407,411 ---- AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); assertEquals("anotherTag child count",2,anotherTag.getChildCount()); ! assertEquals("anotherTag end loc",27,anotherTag.getEndTag ().getEndPosition ()); assertEquals("custom end tag begin loc",27,customTag.getEndTag().getStartPosition ()); assertEquals("custom end tag end loc",36,customTag.getEndTag().getEndPosition ()); *************** *** 498,510 **** createParser("<Custom/>","http://www.yahoo.com"); ! parser.addScanner(new CustomScanner() { ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! { ! url = page.getUrl (); ! return (super.createTag (page, start, end, attributes, startTag, endTag, children)); ! } ! }); parseAndAssertNodeCount(1); ! assertStringEquals("url","http://www.yahoo.com",url); } --- 499,516 ---- createParser("<Custom/>","http://www.yahoo.com"); ! parser.addScanner(new CustomScanner() ! // { ! // public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException ! // { ! // if (null != page) ! // url = page.getUrl (); ! // else ! // url = null; ! // return (super.createTag (page, start, end, attributes, startTag, endTag, children)); ! // } ! // } ! ); parseAndAssertNodeCount(1); ! assertStringEquals("url","http://www.yahoo.com",((AbstractNode)node[0]).getPage ().getUrl ()); } *************** *** 565,569 **** } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException { CustomTag ret; --- 571,575 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException { CustomTag ret; *************** *** 596,600 **** } ! public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException { AnotherTag ret; --- 602,606 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException { AnotherTag ret; Index: FormScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/FormScannerTest.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** FormScannerTest.java 27 Oct 2003 02:18:05 -0000 1.39 --- FormScannerTest.java 1 Nov 2003 21:55:43 -0000 1.40 *************** *** 125,133 **** assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! String expected = ! FORM_HTML.substring (0, FORM_HTML.indexOf ("\"do_login.php\"")) ! + "\"http://www.google.com/test/do_login.php\"" ! + FORM_HTML.substring (FORM_HTML.indexOf ("\"do_login.php\"") + 14); ! assertStringEquals("toHTML",expected,formTag.toHtml()); } --- 125,129 ---- assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! assertStringEquals("toHTML",FORM_HTML,formTag.toHtml()); } |
From: <der...@us...> - 2003-11-01 21:55:47
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv2656/tests Modified Files: FunctionalTests.java LineNumberAssignedByNodeReaderTest.java ParserTestCase.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: FunctionalTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/FunctionalTests.java,v retrieving revision 1.48 retrieving revision 1.49 diff -C2 -d -r1.48 -r1.49 *** FunctionalTests.java 29 Oct 2003 03:31:18 -0000 1.48 --- FunctionalTests.java 1 Nov 2003 21:55:43 -0000 1.49 *************** *** 33,36 **** --- 33,37 ---- import java.io.InputStream; import java.io.InputStreamReader; + import java.io.Reader; import java.net.MalformedURLException; import java.net.URL; *************** *** 67,96 **** // First count the image tags as is int imgTagCount; ! imgTagCount = findImageTagCount(); ! try { ! int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); ! } ! catch (ParserException e) { ! throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); ! } ! } ! public int findImageTagCount() { int imgTagCount = 0; ! try { ! URL url = new URL("http://education.yahoo.com/"); ! InputStream is = url.openStream(); ! BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); ! imgTagCount = countImageTagsWithoutHTMLParser(reader); ! is.close(); ! } ! catch (MalformedURLException e) { ! System.err.println("URL was malformed!"); } ! catch (IOException e) { ! System.err.println("IO Exception occurred while trying to open stream"); } return imgTagCount; --- 68,86 ---- // First count the image tags as is int imgTagCount; ! int parserImgTagCount = countImageTagsWithHTMLParser(); ! imgTagCount = findImageTagCount(getParser ()); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); } ! public int findImageTagCount(Parser parser) { int imgTagCount = 0; ! parser.reset (); ! try ! { ! imgTagCount = countImageTagsWithoutHTMLParser(parser); } ! catch (IOException e) ! { ! System.err.println ("IO Exception occurred while counting tags"); } return imgTagCount; *************** *** 100,103 **** --- 90,94 ---- Parser parser = new Parser("http://education.yahoo.com/",new DefaultParserFeedback()); parser.addScanner(new ImageScanner("-i")); + setParser (parser); int parserImgTagCount = 0; Node node; *************** *** 111,119 **** } ! public int countImageTagsWithoutHTMLParser(BufferedReader reader) throws IOException { String line; ! int imgTagCount = 0; do { ! line = reader.readLine(); if (line!=null) { // Check the line for image tags --- 102,115 ---- } ! public int countImageTagsWithoutHTMLParser (Parser parser) throws IOException ! { ! BufferedReader lines; String line; ! int imgTagCount; ! ! imgTagCount = 0; ! lines = new BufferedReader (parser.getLexer ().getPage ().getSource ()); do { ! line = lines.readLine(); if (line!=null) { // Check the line for image tags Index: LineNumberAssignedByNodeReaderTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/LineNumberAssignedByNodeReaderTest.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** LineNumberAssignedByNodeReaderTest.java 26 Oct 2003 19:46:25 -0000 1.26 --- LineNumberAssignedByNodeReaderTest.java 1 Nov 2003 21:55:43 -0000 1.27 *************** *** 61,70 **** * @throws ParserException if there is a problem parsing the test data */ ! public void testLineNumbers() throws ParserException { testLineNumber("<Custom/>", 1, 0, 0, 0); testLineNumber("<Custom />", 1, 0, 0, 0); testLineNumber("<Custom></Custom>", 1, 0, 0, 0); testLineNumber("<Custom>Content</Custom>", 1, 0, 0, 0); testLineNumber("<Custom>Content<Custom></Custom>", 1, 0, 0, 0); testLineNumber( "<Custom>\n" + --- 61,91 ---- * @throws ParserException if there is a problem parsing the test data */ ! public void testLineNumbers1() throws ParserException ! { testLineNumber("<Custom/>", 1, 0, 0, 0); + } + + public void testLineNumbers2() throws ParserException + { testLineNumber("<Custom />", 1, 0, 0, 0); + } + + public void testLineNumbers3() throws ParserException + { testLineNumber("<Custom></Custom>", 1, 0, 0, 0); + } + + public void testLineNumbers4() throws ParserException + { testLineNumber("<Custom>Content</Custom>", 1, 0, 0, 0); + } + + public void testLineNumbers5() throws ParserException + { testLineNumber("<Custom>Content<Custom></Custom>", 1, 0, 0, 0); + } + + public void testLineNumbers6() throws ParserException + { testLineNumber( "<Custom>\n" + *************** *** 73,76 **** --- 94,101 ---- 1, 0, 0, 2 ); + } + + public void testLineNumbers7() throws ParserException + { testLineNumber( "Foo\n" + *************** *** 80,83 **** --- 105,112 ---- 2, 1, 1, 3 ); + } + + public void testLineNumbers8() throws ParserException + { testLineNumber( "Foo\n" + *************** *** 87,90 **** --- 116,123 ---- 2, 1, 1, 3 ); + } + + public void testLineNumbers9() throws ParserException + { char[] oneHundredNewLines = new char[100]; Arrays.fill(oneHundredNewLines, '\n'); *************** *** 117,121 **** CustomTag tag = (CustomTag)node[useNode]; assertEquals("start line", expectedStartLine, tag.getStartingLineNumber ()); ! assertEquals("end line", expectedEndLine, tag.getEndingLineNumber ()); } --- 150,154 ---- CustomTag tag = (CustomTag)node[useNode]; assertEquals("start line", expectedStartLine, tag.getStartingLineNumber ()); ! assertEquals("end line", expectedEndLine, tag.getEndTag ().getEndingLineNumber ()); } Index: ParserTestCase.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** ParserTestCase.java 1 Nov 2003 04:03:21 -0000 1.37 --- ParserTestCase.java 1 Nov 2003 21:55:43 -0000 1.38 *************** *** 99,102 **** --- 99,112 ---- } + public Parser getParser () + { + return (parser); + } + + public void setParser (Parser parser) + { + this.parser = parser; + } + public void assertStringEquals(String message, String expected, String actual) { |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv2656/tags Modified Files: BaseHrefTag.java CompositeTag.java FormTag.java ImageTag.java LinkTag.java Tag.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: BaseHrefTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BaseHrefTag.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** BaseHrefTag.java 29 Oct 2003 03:31:17 -0000 1.29 --- BaseHrefTag.java 1 Nov 2003 21:55:43 -0000 1.30 *************** *** 31,34 **** --- 31,35 ---- import java.util.Vector; + import org.htmlparser.lexer.Page; import org.htmlparser.util.LinkProcessor; *************** *** 73,85 **** /** ! * Override this because we need a trigger to set the base HREF on the page. ! * NOTE: setting of the attributes is the last thing done on the tag ! * after creation. ! * @param attribs The new BASE tag attributes. */ ! public void setAttributesEx (Vector attribs) { ! super.setAttributesEx (attribs); ! getPage ().getLinkProcessor ().setBaseUrl (getBaseUrl ()); } } --- 74,89 ---- /** ! * Perform the meaning of this tag. ! * This sets the base URL to use for the rest of the page. */ ! public void doSemanticAction () { ! Page page; ! ! page = getPage (); ! if (null != page) ! { ! page.getLinkProcessor ().setBaseUrl (getBaseUrl ()); ! } } } Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.61 retrieving revision 1.62 diff -C2 -d -r1.61 -r1.62 *** CompositeTag.java 26 Oct 2003 19:46:23 -0000 1.61 --- CompositeTag.java 1 Nov 2003 21:55:43 -0000 1.62 *************** *** 45,50 **** */ public abstract class CompositeTag extends Tag { ! protected TagNode startTag; ! protected TagNode endTag; public CompositeTag () --- 45,49 ---- */ public abstract class CompositeTag extends Tag { ! protected TagNode mEndTag; public CompositeTag () *************** *** 58,62 **** public SimpleNodeIterator children () { ! return (getChildren ().elements ()); } --- 57,68 ---- public SimpleNodeIterator children () { ! SimpleNodeIterator ret; ! ! if (null != getChildren ()) ! ret = getChildren ().elements (); ! else ! ret = (new NodeList ()).elements (); ! ! return (ret); } *************** *** 107,114 **** } - public void putStartTagInto(StringBuffer sb) { - sb.append(startTag.toHtml()); - } - protected void putChildrenInto(StringBuffer sb) { --- 113,116 ---- *************** *** 127,139 **** // eliminate virtual tags // if (!(endTag.getStartPosition () == endTag.getEndPosition ())) ! sb.append(endTag.toHtml()); } public String toHtml() { StringBuffer sb = new StringBuffer(); ! putStartTagInto(sb); ! if (!startTag.isEmptyXmlTag()) { putChildrenInto(sb); ! putEndTagInto(sb); } return sb.toString(); --- 129,143 ---- // eliminate virtual tags // if (!(endTag.getStartPosition () == endTag.getEndPosition ())) ! sb.append(getEndTag ().toHtml()); } public String toHtml() { StringBuffer sb = new StringBuffer(); ! sb.append (super.toHtml ()); ! if (!isEmptyXmlTag()) ! { putChildrenInto(sb); ! if (null != getEndTag ()) // this test if for link tags that refuse to scan because there's no HREF attribute ! putEndTagInto(sb); } return sb.toString(); *************** *** 324,335 **** if (visitor.shouldRecurseChildren ()) { ! startTag.accept (visitor); ! children = children (); ! while (children.hasMoreNodes ()) { ! child = (Node)children.nextNode (); ! child.accept (visitor); } ! endTag.accept (visitor); } if (visitor.shouldRecurseSelf ()) --- 328,342 ---- if (visitor.shouldRecurseChildren ()) { ! if (null != getChildren ()) { ! children = children (); ! while (children.hasMoreNodes ()) ! { ! child = (Node)children.nextNode (); ! child.accept (visitor); ! } } ! if (null != getEndTag ()) ! getEndTag ().accept (visitor); } if (visitor.shouldRecurseSelf ()) *************** *** 341,362 **** } public TagNode getStartTag() { ! return startTag; } public void setStartTag (TagNode start) { ! startTag = start; } public TagNode getEndTag() { ! return endTag; } public void setEndTag(TagNode end) { ! endTag = end; } --- 348,376 ---- } + /** + * @deprecated The tag *is* ths start tag. + */ public TagNode getStartTag() { ! return (this); } + /** + * @deprecated The tag *is* ths start tag. + */ public void setStartTag (TagNode start) { ! if (null != start) ! throw new IllegalStateException ("the tag *is* ths start tag"); } public TagNode getEndTag() { ! return (mEndTag); } public void setEndTag(TagNode end) { ! mEndTag = end; } *************** *** 390,395 **** return stringNode; } - - - } --- 404,406 ---- Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** FormTag.java 1 Nov 2003 01:36:57 -0000 1.38 --- FormTag.java 1 Nov 2003 21:55:43 -0000 1.39 *************** *** 42,49 **** --- 42,55 ---- public static final String POST="POST"; public static final String GET="GET"; + + /** + * This is the derived form location, based on action. + */ + protected String mFormLocation; public FormTag () { setTagName ("FORM"); + mFormLocation = null; } *************** *** 72,76 **** public String getFormLocation() { ! return (getAttribute("ACTION")); } --- 78,86 ---- public String getFormLocation() { ! if (null == mFormLocation) ! // ... is it true that without an ACTION the default is to send it back to the same page? ! mFormLocation = extractFormLocn (getPage ().getUrl ()); ! ! return (mFormLocation); } *************** *** 82,85 **** --- 92,96 ---- public void setFormLocation(String url) { + mFormLocation = url; setAttribute ("ACTION", url); } *************** *** 179,200 **** else return (getPage ().getLinkProcessor ().extract (formURL, url)); - } - - /** - * Override this because we need a trigger to set the ACTION attribute. - * NOTE: setting of the children is the last thing done on the tag - * after creation. - * @param children The new list of children this node contains. - */ - public void setChildren (NodeList children) - { - String url; - - super.setChildren (children); - - // ... is it true that without an ACTION the default is to send it back to the same page? - url = extractFormLocn(getPage ().getUrl ()); - if (null != url && 0 < url.length()) - setAttribute ("ACTION",url); } } --- 190,193 ---- Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** ImageTag.java 1 Nov 2003 01:36:57 -0000 1.33 --- ImageTag.java 1 Nov 2003 21:55:43 -0000 1.34 *************** *** 167,171 **** { if (null == imageURL) ! imageURL = extractImageLocn (); return (imageURL); } --- 167,172 ---- { if (null == imageURL) ! if (null != getPage ()) ! imageURL = getPage ().getLinkProcessor ().extract (extractImageLocn (), getPage().getUrl ()); return (imageURL); } *************** *** 193,212 **** visitor.visitImageTag (this); super.accept (visitor); - } - - /** - * Override this because we need a trigger to set the image URL. - * Need to update the imageURL string in the this tag, - * but not the SRC attribute so toHtml() outputs the right thing. - * NOTE: setting of the attributes is the last thing done on the tag - * after creation. - * @param attribs The new IMG tag attributes. - */ - public void setAttributesEx (Vector attribs) - { - String src; - - super.setAttributesEx (attribs); - imageURL = getPage ().getLinkProcessor ().extract (getImageURL (), getPage().getUrl ()); } } --- 194,197 ---- Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** LinkTag.java 1 Nov 2003 01:36:57 -0000 1.40 --- LinkTag.java 1 Nov 2003 21:55:43 -0000 1.41 *************** *** 139,152 **** public String getLinkText() { ! return (getChildren().toString()); ! } ! /** ! * Return the text contained in this linkinode ! * Kaarle Kaila 23.10.2001 ! */ ! public String getText() ! { ! return toHtml(); } --- 139,150 ---- public String getLinkText() { ! String ret; ! if (null != getChildren()) ! ret = getChildren().toString(); ! else ! ret = ""; ! ! return (ret); } *************** *** 248,252 **** else sb.append(getAccessKey ()+"\n"); ! if (children()!=null) { sb.append(" "+"LinkData\n"); --- 246,250 ---- else sb.append(getAccessKey ()+"\n"); ! if (null != getChildren ()) { sb.append(" "+"LinkData\n"); Index: Tag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Tag.java,v retrieving revision 1.54 retrieving revision 1.55 diff -C2 -d -r1.54 -r1.55 *** Tag.java 26 Oct 2003 19:46:24 -0000 1.54 --- Tag.java 1 Nov 2003 21:55:43 -0000 1.55 *************** *** 29,32 **** --- 29,33 ---- package org.htmlparser.tags; + import java.lang.CloneNotSupportedException; import java.util.Enumeration; import java.util.HashSet; *************** *** 50,54 **** * lexer which has nodes). */ ! public class Tag extends TagNode { private TagScanner mScanner; --- 51,55 ---- * lexer which has nodes). */ ! public class Tag extends TagNode implements Cloneable { private TagScanner mScanner; *************** *** 70,73 **** --- 71,79 ---- } + public Object clone() throws CloneNotSupportedException + { + return (super.clone ()); + } + /** * Return the scanner associated with this tag. *************** *** 121,134 **** else ((NodeVisitor)visitor).visitTag (this); - } - - public int getStartingLineNumber () - { - return (getPage ().row (getStartPosition ())); - } - - public int getEndingLineNumber () - { - return (getPage ().row (getEndPosition ())); } } --- 127,130 ---- |
From: <der...@us...> - 2003-11-01 21:55:46
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv2656/scanners Modified Files: BaseHrefScanner.java CompositeTagScanner.java DoctypeScanner.java FrameScanner.java ImageScanner.java InputTagScanner.java JspScanner.java MetaTagScanner.java ScriptScanner.java TagScanner.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: BaseHrefScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BaseHrefScanner.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** BaseHrefScanner.java 29 Oct 2003 03:31:17 -0000 1.29 --- BaseHrefScanner.java 1 Nov 2003 21:55:43 -0000 1.30 *************** *** 59,63 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { BaseHrefTag ret; --- 59,63 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { BaseHrefTag ret; Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.78 retrieving revision 1.79 diff -C2 -d -r1.78 -r1.79 *** CompositeTagScanner.java 1 Nov 2003 01:36:57 -0000 1.78 --- CompositeTagScanner.java 1 Nov 2003 21:55:43 -0000 1.79 *************** *** 192,197 **** String name; TagScanner scanner; ! CompositeTag composite; ! Tag ret; nodeList = new NodeList (); --- 192,196 ---- String name; TagScanner scanner; ! CompositeTag ret; nodeList = new NodeList (); *************** *** 245,255 **** endTag = createVirtualEndTag (tag, lexer.getPage (), lexer.getCursor ().getPosition ()); ! composite = (CompositeTag)createTag (lexer.getPage (), tag.getStartPosition (), endTag.getEndPosition (), tag.getAttributesEx (), tag, endTag, nodeList); ! composite.setThisScanner (this); ! for (int i = 0; i < composite.getChildCount (); i++) ! composite.childAt (i).setParent (composite); ! ret = composite; - return (ret); } --- 244,255 ---- endTag = createVirtualEndTag (tag, lexer.getPage (), lexer.getCursor ().getPosition ()); ! ret = (CompositeTag)tag; ! ret.setEndTag (endTag); ! ret.setChildren (nodeList); ! for (int i = 0; i < ret.getChildCount (); i++) ! ret.childAt (i).setParent (ret); ! endTag.setParent (ret); ! ret.doSemanticAction (); return (ret); } *************** *** 284,288 **** * For composite tags this shouldn't be used and hence throws an exception. */ ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { throw new ParserException ("composite tags shouldn't be using this"); --- 284,288 ---- * For composite tags this shouldn't be used and hence throws an exception. */ ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { throw new ParserException ("composite tags shouldn't be using this"); Index: DoctypeScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DoctypeScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** DoctypeScanner.java 26 Oct 2003 19:46:19 -0000 1.30 --- DoctypeScanner.java 1 Nov 2003 21:55:43 -0000 1.31 *************** *** 58,62 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { DoctypeTag ret; --- 58,62 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { DoctypeTag ret; Index: FrameScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameScanner.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** FrameScanner.java 26 Oct 2003 19:46:19 -0000 1.31 --- FrameScanner.java 1 Nov 2003 21:55:43 -0000 1.32 *************** *** 70,74 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { FrameTag ret; --- 70,74 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { FrameTag ret; Index: ImageScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ImageScanner.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** ImageScanner.java 29 Oct 2003 03:31:17 -0000 1.33 --- ImageScanner.java 1 Nov 2003 21:55:43 -0000 1.34 *************** *** 74,78 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { ImageTag ret; --- 74,78 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { ImageTag ret; Index: InputTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/InputTagScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** InputTagScanner.java 26 Oct 2003 19:46:20 -0000 1.28 --- InputTagScanner.java 1 Nov 2003 21:55:43 -0000 1.29 *************** *** 53,57 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { InputTag ret; --- 53,57 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { InputTag ret; Index: JspScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/JspScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** JspScanner.java 26 Oct 2003 19:46:20 -0000 1.30 --- JspScanner.java 1 Nov 2003 21:55:43 -0000 1.31 *************** *** 57,61 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { JspTag ret; --- 57,61 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { JspTag ret; Index: MetaTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/MetaTagScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** MetaTagScanner.java 26 Oct 2003 19:46:21 -0000 1.28 --- MetaTagScanner.java 1 Nov 2003 21:55:43 -0000 1.29 *************** *** 48,52 **** } ! protected Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { MetaTag ret; --- 48,52 ---- } ! public Tag createTag (Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException { MetaTag ret; Index: ScriptScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v retrieving revision 1.48 retrieving revision 1.49 diff -C2 -d -r1.48 -r1.49 *** ScriptScanner.java 1 Nov 2003 01:36:57 -0000 1.48 --- ScriptScanner.java 1 Nov 2003 21:55:43 -0000 1.49 *************** *** 37,40 **** --- 37,41 ---- import org.htmlparser.lexer.Page; import org.htmlparser.lexer.nodes.NodeFactory; + import org.htmlparser.tags.CompositeTag; import org.htmlparser.tags.ScriptTag; import org.htmlparser.tags.Tag; *************** *** 72,76 **** ret.setEndPosition (end); ret.setAttributesEx (attributes); - ret.setStartTag (startTag); ret.setEndTag (endTag); ret.setChildren (children); --- 73,76 ---- *************** *** 95,99 **** Tag end; NodeFactory factory; ! Tag ret; done = false; --- 95,99 ---- Tag end; NodeFactory factory; ! CompositeTag ret; done = false; *************** *** 163,169 **** if (null == end) end = new Tag (lexer.getPage (), tag.getEndPosition (), tag.getEndPosition (), new Vector ()); ! //TODO: use the factory: ! ret = createTag (lexer.getPage (), tag.getStartPosition (), end.getEndPosition (), tag.getAttributesEx (), tag, end, new NodeList (last)); ! ret.setThisScanner (this); } finally --- 163,172 ---- if (null == end) end = new Tag (lexer.getPage (), tag.getEndPosition (), tag.getEndPosition (), new Vector ()); ! ret = (CompositeTag)tag; ! ret.setEndTag (end); ! ret.setChildren (new NodeList (last)); ! last.setParent (ret); ! end.setParent (ret); ! ret.doSemanticAction (); } finally Index: TagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** TagScanner.java 1 Nov 2003 01:36:57 -0000 1.47 --- TagScanner.java 1 Nov 2003 21:55:43 -0000 1.48 *************** *** 134,139 **** Tag ret; ! ret = createTag(lexer.getPage (), tag.getStartPosition (), tag.getEndPosition (), tag.getAttributesEx (), tag, url); ! ret.setThisScanner(this); return (ret); --- 134,139 ---- Tag ret; ! ret = tag; ! ret.doSemanticAction (); return (ret); *************** *** 149,153 **** * @throws ParserException */ ! protected abstract Tag createTag(Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException; public abstract String [] getID(); --- 149,153 ---- * @throws ParserException */ ! public abstract Tag createTag(Page page, int start, int end, Vector attributes, Tag tag, String url) throws ParserException; public abstract String [] getID(); |
From: <der...@us...> - 2003-11-01 21:55:46
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv2656 Modified Files: AbstractNode.java Node.java Parser.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** AbstractNode.java 26 Oct 2003 19:46:17 -0000 1.18 --- AbstractNode.java 1 Nov 2003 21:55:42 -0000 1.19 *************** *** 308,310 **** --- 308,317 ---- } + /** + * Perform the meaning of this tag. + * The default action is to do nothing. + */ + public void doSemanticAction () + { + } } Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** Node.java 26 Oct 2003 19:46:17 -0000 1.42 --- Node.java 1 Nov 2003 21:55:42 -0000 1.43 *************** *** 193,195 **** --- 193,206 ---- */ public void setText(String text); + + /** + * Perform the meaning of this tag. + * This is defined by the tag, for example the bold tag <B> may switch + * bold text on and off. + * Only a few tags have semantic meaning to the parser. These have to do + * with the character set to use (<META>), the base URL to use + * (<BASE>). Other than that, the semantic meaning is up to the + * application. + */ + public void doSemanticAction (); } Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.72 retrieving revision 1.73 diff -C2 -d -r1.72 -r1.73 *** Parser.java 29 Oct 2003 03:31:17 -0000 1.72 --- Parser.java 1 Nov 2003 21:55:42 -0000 1.73 *************** *** 37,40 **** --- 37,41 ---- import java.util.HashMap; import java.util.Hashtable; + import java.util.Iterator; import java.util.Map; import java.util.Vector; *************** *** 43,46 **** --- 44,48 ---- import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; + import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.lexer.nodes.NodeFactory; import org.htmlparser.lexer.nodes.TagNode; *************** *** 52,55 **** --- 54,58 ---- import org.htmlparser.scanners.BodyScanner; import org.htmlparser.scanners.BulletListScanner; + import org.htmlparser.scanners.CompositeTagScanner; import org.htmlparser.scanners.DivScanner; import org.htmlparser.scanners.DoctypeScanner; *************** *** 192,197 **** * The list of scanners to apply at the top level. */ ! protected Map scanners; ! /** * The current scanner when recursing into a tag. --- 195,206 ---- * The list of scanners to apply at the top level. */ ! protected Map mScanners; ! ! /** ! * The list of tags to return at the top level. ! * The list is keyed by tag name. ! */ ! protected Map mBlastocyst; ! /** * The current scanner when recursing into a tag. *************** *** 376,380 **** * Set the connection for this parser. * This method creates a new <code>Lexer</code> reading from the connection. ! * It does not adjust the <code>scanners</code> list * or <code>feedback</code> object. Trying to * set the connection to null is a noop. --- 385,389 ---- * Set the connection for this parser. * This method creates a new <code>Lexer</code> reading from the connection. ! * It does not adjust the <code>mScanners</code> list * or <code>feedback</code> object. Trying to * set the connection to null is a noop. *************** *** 407,411 **** * Set the URL for this parser. * This method creates a new Lexer reading from the given URL. ! * It does not adjust the <code>scanners</code> list * or <code>feedback</code> object. Trying to set the url to null or an * empty string is a noop. --- 416,420 ---- * Set the URL for this parser. * This method creates a new Lexer reading from the given URL. ! * It does not adjust the <code>mScanners</code> list * or <code>feedback</code> object. Trying to set the url to null or an * empty string is a noop. *************** *** 453,457 **** /** * Set the lexer for this parser. ! * TIt does not adjust the <code>scanners</code> list * or <code>feedback</code> object. * Trying to set the lexer to <code>null</code> is a noop. --- 462,466 ---- /** * Set the lexer for this parser. ! * TIt does not adjust the <code>mScanners</code> list * or <code>feedback</code> object. * Trying to set the lexer to <code>null</code> is a noop. *************** *** 477,502 **** /** ! * Get the number of scanners registered currently in the scanner. ! * @return int number of scanners registered */ ! public int getNumScanners() { ! return scanners.size(); } /** * This method is to be used to change the set of scanners in the current parser. ! * @param newScanners Vector holding scanner objects to be used during the parsing process. */ ! public void setScanners(Map newScanners) { ! scanners = (null == newScanners) ? new HashMap() : newScanners; } /** ! * Get an enumeration of scanners registered currently in the parser ! * @return Enumeration of scanners currently registered in the parser */ ! public Map getScanners() { ! return scanners; } --- 486,522 ---- /** ! * Get the number of scanners registered currently in the parser. ! * @return int number of scanners registered. */ ! public int getNumScanners() ! { ! return mScanners.size(); } /** * This method is to be used to change the set of scanners in the current parser. ! * @param newScanners List of scanner objects to be used during the parsing process. */ ! public void setScanners (Map newScanners) { ! Iterator iterator; ! TagScanner scanner; ! ! flushScanners (); ! if (null != newScanners) ! for (iterator = newScanners.entrySet ().iterator (); iterator.hasNext (); ) ! { ! scanner = (TagScanner)iterator.next (); ! addScanner (scanner); ! } } /** ! * Get the list of scanners registered currently in the parser ! * @return List of scanners currently registered in the parser */ ! public Map getScanners() ! { ! return mScanners; } *************** *** 536,546 **** * It is advantageous to register only the scanners you want, in order to achieve faster parsing speed. This method * would also be of use when you have developed custom scanners, and need to register them into the parser. ! * @param scanner TagScanner object (or derivative) to be added to the list of registered scanners */ public void addScanner(TagScanner scanner) { ! String ids[] = scanner.getID(); ! for (int i=0;i<ids.length;i++) { ! scanners.put(ids[i],scanner); } } --- 556,590 ---- * It is advantageous to register only the scanners you want, in order to achieve faster parsing speed. This method * would also be of use when you have developed custom scanners, and need to register them into the parser. ! * @param scanner TagScanner object (or derivative) to be added to the list of registered scanners. */ public void addScanner(TagScanner scanner) { ! String ids[]; ! Tag tag; ! ! ids = scanner.getID(); ! for (int i = 0; i < ids.length; i++) ! { ! mScanners.put (ids[i], scanner); ! // for now, the only way to create a tag is to ask the scanner... ! try ! { ! if (scanner instanceof CompositeTagScanner) ! { ! tag = ((CompositeTagScanner)scanner).createTag (null, 0, 0, null, null, null, null); ! tag.setThisScanner (scanner); ! mBlastocyst.put (ids[i], tag); ! } ! else ! { ! tag = scanner.createTag (null, 0, 0, null, null, null); ! tag.setThisScanner (scanner); ! mBlastocyst.put (ids[i], tag); ! } ! } ! catch (Exception e) ! { ! e.printStackTrace (); ! } } } *************** *** 585,589 **** try { ! if (null == scanners.get ("META")) { addScanner (new MetaTagScanner ("-m")); --- 629,633 ---- try { ! if (null == mScanners.get ("META")) { addScanner (new MetaTagScanner ("-m")); *************** *** 626,630 **** { if (remove_scanner) ! scanners.remove ("META"); } --- 670,674 ---- { if (remove_scanner) ! mScanners.remove ("META"); } *************** *** 633,640 **** /** ! * Flush the current scanners registered. The registered scanners list becomes empty with this call. */ ! public void flushScanners() { ! scanners = new Hashtable(); } --- 677,687 ---- /** ! * Flush the current scanners registered. ! * The registered scanners list becomes empty with this call. */ ! public void flushScanners() ! { ! mScanners = new Hashtable (); ! mBlastocyst = new Hashtable (); } *************** *** 645,650 **** * @return TagScanner The Tag Scanner */ ! public TagScanner getScanner(String id) { ! return (TagScanner)scanners.get(id); } --- 692,707 ---- * @return TagScanner The Tag Scanner */ ! public TagScanner getScanner (String id) ! { ! Tag tag; ! TagScanner ret; ! ! ret = null; ! ! tag = (Tag)mBlastocyst.get (id); ! if (null != tag) ! ret = (TagScanner)tag.getThisScanner (); ! ! return (ret); } *************** *** 686,690 **** /** ! * This method should be invoked in order to register some common scanners. The scanners that get added are : <br> * LinkScanner (filter key "-l")<br> * ImageScanner (filter key "-i")<br> --- 743,748 ---- /** ! * This method should be invoked in order to register some common scanners. ! * The scanners that get added are : <br> * LinkScanner (filter key "-l")<br> * ImageScanner (filter key "-i")<br> *************** *** 709,715 **** */ public void registerScanners() { ! if (scanners.size()>0) { System.err.println("registerScanners() should be called first, when no other scanner has been registered."); ! System.err.println("Other scanners already exist, hence this method call wont have any effect"); return; } --- 767,774 ---- */ public void registerScanners() { ! if (mScanners.size()>0) ! { System.err.println("registerScanners() should be called first, when no other scanner has been registered."); ! System.err.println("Other scanners already exist, hence this method call won't have any effect"); return; } *************** *** 758,763 **** * @param scanner TagScanner object to be removed from the list of registered scanners */ ! public void removeScanner(TagScanner scanner) { ! scanners.remove(scanner.getID()[0]); } --- 817,830 ---- * @param scanner TagScanner object to be removed from the list of registered scanners */ ! public void removeScanner(TagScanner scanner) ! { ! String[] ids; ! ! ids = scanner.getID (); ! for (int i = 0; i < ids.length; i++) ! { ! mScanners.remove (ids[i]); ! mBlastocyst.remove (ids[i]); ! } } *************** *** 1029,1033 **** ParserException { ! return (new Tag (page, start, end, attributes)); } } --- 1096,1140 ---- ParserException { ! Attribute attribute; ! String id; ! Tag prototype; ! Tag ret; ! ! ret = null; ! ! if (0 != attributes.size ()) ! { ! attribute = (Attribute)attributes.elementAt (0); ! id = attribute.getName (); ! if (null != id) ! { ! try ! { ! id = id.toUpperCase (); ! if (!id.startsWith ("/")) ! { ! if (id.endsWith ("/")) ! id = id.substring (0, id.length () - 1); ! prototype = (Tag)mBlastocyst.get (id); ! if (null != prototype) ! { ! ret = (Tag)prototype.clone (); ! ret.setPage (page); ! ret.setStartPosition (start); ! ret.setEndPosition (end); ! ret.setAttributesEx (attributes); ! } ! } ! } ! catch (CloneNotSupportedException cnse) ! { ! // default to creating a new one ! } ! } ! } ! if (null == ret) ! ret = new Tag (page, start, end, attributes); ! ! return (ret); } } |
From: <der...@us...> - 2003-11-01 21:55:46
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv2656/lexer/nodes Modified Files: TagNode.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** TagNode.java 1 Nov 2003 01:36:56 -0000 1.21 --- TagNode.java 1 Nov 2003 21:55:42 -0000 1.22 *************** *** 257,269 **** attributes = getAttributesEx (); ! size = attributes.size (); ! for (int i = 0; i < size; i++) { ! attribute = (Attribute)attributes.elementAt (i); ! string = attribute.getName (); ! if ((null != string) && name.equalsIgnoreCase (string)) { ! ret = attribute; ! i = size; // exit fast } } --- 257,272 ---- attributes = getAttributesEx (); ! if (null != attributes) { ! size = attributes.size (); ! for (int i = 0; i < size; i++) { ! attribute = (Attribute)attributes.elementAt (i); ! string = attribute.getName (); ! if ((null != string) && name.equalsIgnoreCase (string)) ! { ! ret = attribute; ! i = size; // exit fast ! } } } *************** *** 809,812 **** --- 812,833 ---- return ((null == raw) ? false : ('/' == raw.charAt (0))); + } + + /** + * Get the line number where this tag starts. + * @return The (zero based) line number in the page where this tag starts. + */ + public int getStartingLineNumber () + { + return (getPage ().row (getStartPosition ())); + } + + /** + * Get the line number where this tag ends. + * @return The (zero based) line number in the page where this tag ends. + */ + public int getEndingLineNumber () + { + return (getPage ().row (getEndPosition ())); } } |
From: <der...@us...> - 2003-11-01 21:55:46
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1:/tmp/cvs-serv2656/nodeDecorators Modified Files: AbstractNodeDecorator.java Log Message: Create nodes by cloning from a list of prototypes in the Parser (NodeFactory). So now, the startTag() is the CompositeTag, and the CompositeTagScanner just adds children. This is an intermediate code drop on the way to integrating the scanners with the tags; the scanners no longer create the tags (but they still create the prototypical ones). Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** AbstractNodeDecorator.java 26 Oct 2003 19:46:18 -0000 1.13 --- AbstractNodeDecorator.java 1 Nov 2003 21:55:42 -0000 1.14 *************** *** 147,149 **** --- 147,152 ---- } + public void doSemanticAction () { + delegate.doSemanticAction (); + } } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv2709/tagTests Modified Files: BaseHrefTagTest.java BodyTagTest.java OptionTagTest.java TitleTagTest.java Log Message: Fixed all tests failing if case sensitivity was turned on. Mostly this involved comparing the output from toHtml() with the string passed to the parser. Now ParserTestCase does case sensitive comparisons. Index: BaseHrefTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BaseHrefTagTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** BaseHrefTagTest.java 26 Oct 2003 19:46:27 -0000 1.32 --- BaseHrefTagTest.java 1 Nov 2003 04:03:21 -0000 1.33 *************** *** 51,61 **** } ! public void testNotHREFBaseTag() throws ParserException { ! createParser("<base target=\"_top\">"); parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue("Should be a base tag but was "+node[0].getClass().getName(),node[0] instanceof BaseHrefTag); BaseHrefTag baseTag = (BaseHrefTag)node[0]; ! assertStringEquals("Base Tag HTML","<BASE TARGET=\"_top\">",baseTag.toHtml()); } --- 51,63 ---- } ! public void testNotHREFBaseTag() throws ParserException ! { ! String html = "<base target=\"_top\">"; ! createParser(html); parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue("Should be a base tag but was "+node[0].getClass().getName(),node[0] instanceof BaseHrefTag); BaseHrefTag baseTag = (BaseHrefTag)node[0]; ! assertStringEquals("Base Tag HTML", html, baseTag.toHtml()); } Index: BodyTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BodyTagTest.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** BodyTagTest.java 26 Oct 2003 19:46:27 -0000 1.15 --- BodyTagTest.java 1 Nov 2003 04:03:21 -0000 1.16 *************** *** 47,50 **** --- 47,51 ---- private BodyTag bodyTag; + private String html = "<body>Yahoo!</body>"; public BodyTagTest(String name) { *************** *** 54,58 **** protected void setUp() throws Exception { super.setUp(); ! createParser("<html><head><title>body tag test</title></head><body>Yahoo!</body></html>"); parser.registerScanners(); parser.addScanner(new BodyScanner("-b")); --- 55,59 ---- protected void setUp() throws Exception { super.setUp(); ! createParser("<html><head><title>body tag test</title></head>" + html + "</html>"); parser.registerScanners(); parser.addScanner(new BodyScanner("-b")); *************** *** 68,72 **** public void testToHTML() throws ParserException { ! assertStringEquals("Raw String","<BODY>Yahoo!</BODY>",bodyTag.toHtml()); } --- 69,73 ---- public void testToHTML() throws ParserException { ! assertStringEquals("Raw String", html, bodyTag.toHtml()); } Index: OptionTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/OptionTagTest.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** OptionTagTest.java 28 Oct 2003 03:04:19 -0000 1.36 --- OptionTagTest.java 1 Nov 2003 04:03:21 -0000 1.37 *************** *** 44,62 **** } ! private String testHTML = new String( ! "<OPTION value=\"Google Search\">Google</OPTION>" + ! "<OPTION value=\"AltaVista Search\">AltaVista" + ! "<OPTION value=\"Lycos Search\"></OPTION>" + ! "<OPTION>Yahoo!</OPTION>" + ! "<OPTION>\nHotmail</OPTION>" + ! "<OPTION value=\"ICQ Messenger\">" + ! "<OPTION>Mailcity\n</OPTION>"+ ! "<OPTION>\nIndiatimes\n</OPTION>"+ ! "<OPTION>\nRediff\n</OPTION>"+ ! "<OPTION>Cricinfo" + ! "<OPTION value=\"Microsoft Passport\">"+ ! "<OPTION value=\"AOL\"><SPAN>AOL</SPAN></OPTION>" + ! "<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL>" ! ); public OptionTagTest(String name) --- 44,62 ---- } ! private String option1 = "<OPTION value=\"Google Search\">Google</OPTION>"; ! private String option2 = "<OPTION value=\"AltaVista Search\">AltaVista"; ! private String option3 = "<OPTION value=\"Lycos Search\"></OPTION>"; ! private String option4 = "<OPTION>Yahoo!</OPTION>"; ! private String option5 = "<OPTION>\nHotmail</OPTION>"; ! private String option6 = "<OPTION value=\"ICQ Messenger\">"; ! private String option7 = "<OPTION>Mailcity\n</OPTION>"; ! private String option8 = "<OPTION>\nIndiatimes\n</OPTION>"; ! private String option9 = "<OPTION>\nRediff\n</OPTION>"; ! private String option10 = "<OPTION>Cricinfo"; ! private String option11 = "<OPTION value=\"Microsoft Passport\">"; ! private String option12 = "<OPTION value=\"AOL\"><SPAN>AOL</SPAN></OPTION>"; ! private String option13 = "<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL></OPTION>"; ! private String testHTML = option1 + option2 + option3 + option4 + option5 + option6 ! + option7 + option8 + option9 + option10 + option11 + option12 + option13; public OptionTagTest(String name) *************** *** 82,110 **** OptionTag OptionTag; OptionTag = (OptionTag) node[0]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Google Search\">Google</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[1]; ! assertStringEquals("HTML String","<OPTION VALUE=\"AltaVista Search\">AltaVista</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[2]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Lycos Search\"></OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[3]; ! assertStringEquals("HTML String","<OPTION>Yahoo!</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[4]; ! assertStringEquals("HTML String","<OPTION>\nHotmail</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[5]; ! assertStringEquals("HTML String","<OPTION VALUE=\"ICQ Messenger\"></OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[6]; ! assertStringEquals("HTML String","<OPTION>Mailcity\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[7]; ! assertStringEquals("HTML String","<OPTION>\nIndiatimes\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[8]; ! assertStringEquals("HTML String","<OPTION>\nRediff\n</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[9]; ! assertStringEquals("HTML String","<OPTION>Cricinfo</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[10]; ! assertStringEquals("HTML String","<OPTION VALUE=\"Microsoft Passport\"></OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[11]; ! assertStringEquals("HTML String","<OPTION VALUE=\"AOL\"><SPAN>AOL</SPAN></OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[12]; ! assertStringEquals("HTML String","<OPTION value=\"Time Warner\">Time <LABEL>Warner <SPAN>AOL </SPAN>Inc.</LABEL></OPTION>",OptionTag.toHtml()); } --- 82,110 ---- OptionTag OptionTag; OptionTag = (OptionTag) node[0]; ! assertStringEquals("HTML String", option1, OptionTag.toHtml()); OptionTag = (OptionTag) node[1]; ! assertStringEquals("HTML String", option2 + "</OPTION>", OptionTag.toHtml()); OptionTag = (OptionTag) node[2]; ! assertStringEquals("HTML String", option3, OptionTag.toHtml()); OptionTag = (OptionTag) node[3]; ! assertStringEquals("HTML String", option4, OptionTag.toHtml()); OptionTag = (OptionTag) node[4]; ! assertStringEquals("HTML String", option5, OptionTag.toHtml()); OptionTag = (OptionTag) node[5]; ! assertStringEquals("HTML String", option6 + "</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[6]; ! assertStringEquals("HTML String", option7, OptionTag.toHtml()); OptionTag = (OptionTag) node[7]; ! assertStringEquals("HTML String", option8, OptionTag.toHtml()); OptionTag = (OptionTag) node[8]; ! assertStringEquals("HTML String", option9, OptionTag.toHtml()); OptionTag = (OptionTag) node[9]; ! assertStringEquals("HTML String", option10 + "</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[10]; ! assertStringEquals("HTML String", option11 + "</OPTION>",OptionTag.toHtml()); OptionTag = (OptionTag) node[11]; ! assertStringEquals("HTML String", option12,OptionTag.toHtml()); OptionTag = (OptionTag) node[12]; ! assertStringEquals("HTML String", option13, OptionTag.toHtml()); } Index: TitleTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TitleTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** TitleTagTest.java 26 Oct 2003 19:46:27 -0000 1.30 --- TitleTagTest.java 1 Nov 2003 04:03:21 -0000 1.31 *************** *** 44,47 **** --- 44,53 ---- private TitleTag titleTag; + private String prefix = "<html><head>"; + private String tag1 = "<title>Yahoo!</title>"; + private String tag2 = "<base href=http://www.yahoo.com/ target=_top>"; + private String tag3 = "<meta http-equiv=\"PICS-Label\" content='(PICS-1.1 \"http://www.icra.org/ratingsv02.html\" l r (cz 1 lz 1 nz 1 oz 1 vz 1) gen true for \"http://www.yahoo.com\" r (cz 1 lz 1 nz 1 oz 1 vz 1) \"http://www.rsac.org/ratingsv01.html\" l r (n 0 s 0 v 0 l 0) gen true for \"http://www.yahoo.com\" r (n 0 s 0 v 0 l 0))'>"; + private String tag4 = "<style>a.h{background-color:#ffee99}</style>"; + private String suffix = "</head>"; public TitleTagTest(String name) { *************** *** 50,54 **** protected void setUp() throws Exception { super.setUp(); ! createParser("<html><head><title>Yahoo!</title><base href=http://www.yahoo.com/ target=_top><meta http-equiv=\"PICS-Label\" content='(PICS-1.1 \"http://www.icra.org/ratingsv02.html\" l r (cz 1 lz 1 nz 1 oz 1 vz 1) gen true for \"http://www.yahoo.com\" r (cz 1 lz 1 nz 1 oz 1 vz 1) \"http://www.rsac.org/ratingsv01.html\" l r (n 0 s 0 v 0 l 0) gen true for \"http://www.yahoo.com\" r (n 0 s 0 v 0 l 0))'><style>a.h{background-color:#ffee99}</style></head>"); parser.addScanner(new TitleScanner("-t")); parser.addScanner(new StyleScanner("-s")); --- 56,60 ---- protected void setUp() throws Exception { super.setUp(); ! createParser(prefix + tag1 + tag2 + tag3 + tag4 + suffix); parser.addScanner(new TitleScanner("-t")); parser.addScanner(new StyleScanner("-s")); *************** *** 64,68 **** public void testToHTML() throws ParserException { ! assertStringEquals("Raw String","<TITLE>Yahoo!</TITLE>",titleTag.toHtml()); } --- 70,74 ---- public void testToHTML() throws ParserException { ! assertStringEquals("Raw String",tag1,titleTag.toHtml()); } |
From: <der...@us...> - 2003-11-01 04:03:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1:/tmp/cvs-serv2709/lexerTests Modified Files: TagTests.java Log Message: Fixed all tests failing if case sensitivity was turned on. Mostly this involved comparing the output from toHtml() with the string passed to the parser. Now ParserTestCase does case sensitive comparisons. Index: TagTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/TagTests.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** TagTests.java 26 Oct 2003 19:46:25 -0000 1.3 --- TagTests.java 1 Nov 2003 04:03:21 -0000 1.4 *************** *** 103,108 **** } ! public void testEmptyTag() throws Exception { ! createParser("<custom/>"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); --- 103,110 ---- } ! public void testEmptyTag() throws Exception ! { ! String html = "<custom/>"; ! createParser(html); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); *************** *** 112,116 **** assertStringEquals( "html", ! "<CUSTOM/>", tag.toHtml() ); --- 114,118 ---- assertStringEquals( "html", ! html, tag.toHtml() ); *************** *** 400,409 **** public void testStandAloneToHTML () throws ParserException { ! createParser("<input disabled>"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; ! String html = tag.toHtml (); ! assertStringEquals ("html","<INPUT DISABLED>", html); } --- 402,411 ---- public void testStandAloneToHTML () throws ParserException { ! String html = "<input disabled>"; ! createParser(html); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; ! assertStringEquals ("html", html, tag.toHtml ()); } *************** *** 413,422 **** public void testMissingValueToHTML () throws ParserException { ! createParser("<input disabled=>"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; ! String html = tag.toHtml (); ! assertStringEquals ("html","<INPUT DISABLED=>", html); } } --- 415,424 ---- public void testMissingValueToHTML () throws ParserException { ! String html = "<input disabled=>"; ! createParser(html); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; ! assertStringEquals ("html", html, tag.toHtml ()); } } |
From: <der...@us...> - 2003-11-01 04:03:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv2709/scannersTests Modified Files: CompositeTagScannerTest.java LabelScannerTest.java ScriptScannerTest.java Log Message: Fixed all tests failing if case sensitivity was turned on. Mostly this involved comparing the output from toHtml() with the string passed to the parser. Now ParserTestCase does case sensitive comparisons. Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** CompositeTagScannerTest.java 1 Nov 2003 01:36:57 -0000 1.49 --- CompositeTagScannerTest.java 1 Nov 2003 04:03:21 -0000 1.50 *************** *** 411,420 **** } ! public void testCompositeTagWithSelfChildren() throws ParserException { ! createParser( ! "<custom>" + ! "<custom>something</custom>" + ! "</custom>" ! ); parser.addScanner(new CustomScanner(false)); parser.addScanner(new AnotherScanner()); --- 411,420 ---- } ! public void testCompositeTagWithSelfChildren() throws ParserException ! { ! String tag1 = "<custom>"; ! String tag2 = "<custom>something</custom>"; ! String tag3 = "</custom>"; ! createParser(tag1 + tag2 + tag3); parser.addScanner(new CustomScanner(false)); parser.addScanner(new AnotherScanner()); *************** *** 427,431 **** assertStringEquals( "first custom tag html", ! "<CUSTOM></CUSTOM>", customTag.toHtml() ); --- 427,431 ---- assertStringEquals( "first custom tag html", ! tag1 + "</custom>", customTag.toHtml() ); *************** *** 433,437 **** assertStringEquals( "second custom tag html", ! "<CUSTOM>something</CUSTOM>", customTag.toHtml() ); --- 433,437 ---- assertStringEquals( "second custom tag html", ! tag2, customTag.toHtml() ); *************** *** 439,443 **** assertStringEquals( "third custom tag html", ! "</CUSTOM>", endTag.toHtml() ); --- 439,443 ---- assertStringEquals( "third custom tag html", ! tag3, endTag.toHtml() ); *************** *** 445,453 **** public void testParentConnections() throws ParserException { ! createParser( ! "<custom>" + ! "<custom>something</custom>" + ! "</custom>" ! ); parser.addScanner(new CustomScanner(false)); parser.addScanner(new AnotherScanner()); --- 445,452 ---- public void testParentConnections() throws ParserException { ! String tag1 = "<custom>"; ! String tag2 = "<custom>something</custom>"; ! String tag3 = "</custom>"; ! createParser(tag1 + tag2 + tag3); parser.addScanner(new CustomScanner(false)); parser.addScanner(new AnotherScanner()); *************** *** 458,462 **** assertStringEquals( "first custom tag html", ! "<CUSTOM></CUSTOM>", customTag.toHtml() ); --- 457,461 ---- assertStringEquals( "first custom tag html", ! tag1 + "</custom>", customTag.toHtml() ); *************** *** 469,473 **** assertStringEquals( "second custom tag html", ! "<CUSTOM>something</CUSTOM>", customTag.toHtml() ); --- 468,472 ---- assertStringEquals( "second custom tag html", ! tag2, customTag.toHtml() ); *************** *** 486,490 **** assertStringEquals( "third custom tag html", ! "</CUSTOM>", endTag.toHtml() ); --- 485,489 ---- assertStringEquals( "third custom tag html", ! tag3, endTag.toHtml() ); Index: LabelScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/LabelScannerTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** LabelScannerTest.java 26 Oct 2003 19:46:26 -0000 1.40 --- LabelScannerTest.java 1 Nov 2003 04:03:21 -0000 1.41 *************** *** 49,54 **** super(name); } ! public void testSimpleLabels() throws ParserException { ! createParser("<label>This is a label tag</label>"); LabelScanner labelScanner = new LabelScanner("-l"); parser.addScanner(labelScanner); --- 49,56 ---- super(name); } ! public void testSimpleLabels() throws ParserException ! { ! String html = "<label>This is a label tag</label>"; ! createParser(html); LabelScanner labelScanner = new LabelScanner("-l"); parser.addScanner(labelScanner); *************** *** 59,63 **** assertEquals("Label","This is a label tag",labelTag.getChildrenHTML()); assertEquals("Label","This is a label tag",labelTag.getLabel()); ! assertStringEquals("Label","<LABEL>This is a label tag</LABEL>",labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } --- 61,65 ---- assertEquals("Label","This is a label tag",labelTag.getChildrenHTML()); assertEquals("Label","This is a label tag",labelTag.getLabel()); ! assertStringEquals("Label", html, labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } *************** *** 77,82 **** } ! public void testLabelWithOtherTags() throws ParserException { ! createParser("<label><span>Span within label</span></label>"); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); --- 79,86 ---- } ! public void testLabelWithOtherTags() throws ParserException ! { ! String html = "<label><span>Span within label</span></label>"; ! createParser(html); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); *************** *** 87,91 **** LabelTag labelTag = (LabelTag) node[0]; assertEquals("Label value","Span within label",labelTag.getLabel()); ! assertStringEquals("Label","<LABEL><SPAN>Span within label</SPAN></LABEL>",labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } --- 91,95 ---- LabelTag labelTag = (LabelTag) node[0]; assertEquals("Label value","Span within label",labelTag.getLabel()); ! assertStringEquals("Label", html, labelTag.toHtml()); assertEquals("Label Scanner",labelScanner,labelTag.getThisScanner()); } *************** *** 108,113 **** ! public void testLabelsID() throws ParserException { ! createParser("<label>John Doe</label>"); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); --- 112,119 ---- ! public void testLabelsID() throws ParserException ! { ! String html = "<label>John Doe</label>"; ! createParser(html); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); *************** *** 115,127 **** parseAndAssertNodeCount(1); assertTrue(node[0] instanceof LabelTag); - LabelTag labelTag = (LabelTag) node[0]; ! assertStringEquals("Label","<LABEL>John Doe</LABEL>",labelTag.toHtml()); Hashtable attr = labelTag.getAttributes(); assertNull("ID",attr.get("id")); } ! public void testNestedLabels() throws ParserException { ! createParser("<label id=\"attr1\"><label>Jane Doe"); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); --- 121,135 ---- parseAndAssertNodeCount(1); assertTrue(node[0] instanceof LabelTag); LabelTag labelTag = (LabelTag) node[0]; ! assertStringEquals("Label", html, labelTag.toHtml()); Hashtable attr = labelTag.getAttributes(); assertNull("ID",attr.get("id")); } ! public void testNestedLabels() throws ParserException ! { ! String label1 = "<label id=\"attr1\">"; ! String label2 = "<label>Jane Doe"; ! createParser(label1 + label2); parser.registerScanners(); LabelScanner labelScanner = new LabelScanner("-l"); *************** *** 130,158 **** assertTrue(node[0] instanceof LabelTag); assertTrue(node[1] instanceof LabelTag); - LabelTag labelTag = (LabelTag) node[0]; ! assertStringEquals("Label","<LABEL ID=\"attr1\"></LABEL>",labelTag.toHtml()); labelTag = (LabelTag) node[1]; ! assertStringEquals("Label","<LABEL>Jane Doe</LABEL>",labelTag.toHtml()); Hashtable attr = labelTag.getAttributes(); assertNull("ID",attr.get("id")); } ! public void testNestedLabels2() throws ParserException { ! String testHTML = new String( ! "<LABEL value=\"Google Search\">Google</LABEL>" + ! "<LABEL value=\"AltaVista Search\">AltaVista" + ! "<LABEL value=\"Lycos Search\"></LABEL>" + ! "<LABEL>Yahoo!</LABEL>" + ! "<LABEL>\nHotmail</LABEL>" + ! "<LABEL value=\"ICQ Messenger\">" + ! "<LABEL>Mailcity\n</LABEL>"+ ! "<LABEL>\nIndiatimes\n</LABEL>"+ ! "<LABEL>\nRediff\n</LABEL>"+ ! "<LABEL>Cricinfo" + ! "<LABEL value=\"Microsoft Passport\">" + ! "<LABEL value=\"AOL\"><SPAN>AOL</SPAN></LABEL>" + ! "<LABEL value=\"Time Warner\">Time <B>Warner <SPAN>AOL </SPAN>Inc.</B>" ! ); createParser(testHTML); //parser.registerScanners(); --- 138,166 ---- assertTrue(node[0] instanceof LabelTag); assertTrue(node[1] instanceof LabelTag); LabelTag labelTag = (LabelTag) node[0]; ! assertStringEquals("Label", label1 + "</label>", labelTag.toHtml()); labelTag = (LabelTag) node[1]; ! assertStringEquals("Label", label2 + "</label>",labelTag.toHtml()); Hashtable attr = labelTag.getAttributes(); assertNull("ID",attr.get("id")); } ! public void testNestedLabels2() throws ParserException ! { ! String label1 = "<LABEL value=\"Google Search\">Google</LABEL>"; ! String label2 = "<LABEL value=\"AltaVista Search\">AltaVista"; ! String label3 = "<LABEL value=\"Lycos Search\"></LABEL>"; ! String label4 = "<LABEL>Yahoo!</LABEL>"; ! String label5 = "<LABEL>\nHotmail</LABEL>"; ! String label6 = "<LABEL value=\"ICQ Messenger\">"; ! String label7 = "<LABEL>Mailcity\n</LABEL>"; ! String label8 = "<LABEL>\nIndiatimes\n</LABEL>"; ! String label9 = "<LABEL>\nRediff\n</LABEL>"; ! String label10 = "<LABEL>Cricinfo"; ! String label11 = "<LABEL value=\"Microsoft Passport\">"; ! String label12 = "<LABEL value=\"AOL\"><SPAN>AOL</SPAN></LABEL>"; ! String label13 = "<LABEL value=\"Time Warner\">Time <B>Warner <SPAN>AOL </SPAN>Inc.</B>"; ! String testHTML = label1 + label2 + label3 + label4 + label5 + label6 ! + label7 + label8 + label9 + label10 + label11 + label12 + label13; createParser(testHTML); //parser.registerScanners(); *************** *** 160,198 **** parser.addScanner(labelScanner); parseAndAssertNodeCount(13); - - // for(int j=0;j<nodeCount;j++) - // { - // //assertTrue("Node " + j + " should be Label Tag",node[j] instanceof LabelTag); - // System.out.println(node[j].getClass().getName()); - // System.out.println(node[j].toHtml()); - // } - LabelTag LabelTag; LabelTag = (LabelTag) node[0]; ! assertStringEquals("HTML String","<LABEL VALUE=\"Google Search\">Google</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[1]; ! assertStringEquals("HTML String","<LABEL VALUE=\"AltaVista Search\">AltaVista</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[2]; ! assertStringEquals("HTML String","<LABEL VALUE=\"Lycos Search\"></LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[3]; ! assertStringEquals("HTML String","<LABEL>Yahoo!</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[4]; ! assertStringEquals("HTML String","<LABEL>\nHotmail</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[5]; ! assertStringEquals("HTML String","<LABEL VALUE=\"ICQ Messenger\"></LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[6]; ! assertStringEquals("HTML String","<LABEL>Mailcity\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[7]; ! assertStringEquals("HTML String","<LABEL>\nIndiatimes\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[8]; ! assertStringEquals("HTML String","<LABEL>\nRediff\n</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[9]; ! assertStringEquals("HTML String","<LABEL>Cricinfo</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[10]; ! assertStringEquals("HTML String","<LABEL VALUE=\"Microsoft Passport\"></LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[11]; ! assertStringEquals("HTML String","<LABEL VALUE=\"AOL\"><SPAN>AOL</SPAN></LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[12]; ! assertStringEquals("HTML String","<LABEL VALUE=\"Time Warner\">Time <B>Warner <SPAN>AOL </SPAN>Inc.</B></LABEL>",LabelTag.toHtml()); } --- 168,198 ---- parser.addScanner(labelScanner); parseAndAssertNodeCount(13); LabelTag LabelTag; LabelTag = (LabelTag) node[0]; ! assertStringEquals("HTML String", label1, LabelTag.toHtml()); LabelTag = (LabelTag) node[1]; ! assertStringEquals("HTML String", label2 + "</LABEL>", LabelTag.toHtml()); LabelTag = (LabelTag) node[2]; ! assertStringEquals("HTML String", label3, LabelTag.toHtml()); LabelTag = (LabelTag) node[3]; ! assertStringEquals("HTML String", label4, LabelTag.toHtml()); LabelTag = (LabelTag) node[4]; ! assertStringEquals("HTML String", label5, LabelTag.toHtml()); LabelTag = (LabelTag) node[5]; ! assertStringEquals("HTML String", label6 + "</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[6]; ! assertStringEquals("HTML String", label7, LabelTag.toHtml()); LabelTag = (LabelTag) node[7]; ! assertStringEquals("HTML String", label8, LabelTag.toHtml()); LabelTag = (LabelTag) node[8]; ! assertStringEquals("HTML String", label9, LabelTag.toHtml()); LabelTag = (LabelTag) node[9]; ! assertStringEquals("HTML String", label10 + "</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[10]; ! assertStringEquals("HTML String", label11 + "</LABEL>",LabelTag.toHtml()); LabelTag = (LabelTag) node[11]; ! assertStringEquals("HTML String", label12, LabelTag.toHtml()); LabelTag = (LabelTag) node[12]; ! assertStringEquals("HTML String", label13 + "</LABEL>",LabelTag.toHtml()); } Index: ScriptScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ScriptScannerTest.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** ScriptScannerTest.java 27 Oct 2003 02:18:05 -0000 1.45 --- ScriptScannerTest.java 1 Nov 2003 04:03:21 -0000 1.46 *************** *** 533,542 **** * See bug #741769 ScriptScanner doesn't handle quoted </script> tags */ ! public void testScanQuotedEndTag() throws ParserException { ! createParser("<SCRIPT language=\"JavaScript\">document.write('</SCRIPT>');</SCRIPT>"); parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); ! String s = node[0].toHtml (); ! assertStringEquals ("Parse error","<SCRIPT LANGUAGE=\"JavaScript\">document.write('</SCRIPT>');</SCRIPT>",s); } --- 533,543 ---- * See bug #741769 ScriptScanner doesn't handle quoted </script> tags */ ! public void testScanQuotedEndTag() throws ParserException ! { ! String html = "<SCRIPT language=\"JavaScript\">document.write('</SCRIPT>');</SCRIPT>"; ! createParser(html); parser.addScanner(new ScriptScanner("-s")); parseAndAssertNodeCount(1); ! assertStringEquals ("Parse error", html, node[0].toHtml ()); } |
From: <der...@us...> - 2003-11-01 04:03:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv2709 Modified Files: ParserTestCase.java Log Message: Fixed all tests failing if case sensitivity was turned on. Mostly this involved comparing the output from toHtml() with the string passed to the parser. Now ParserTestCase does case sensitive comparisons. Index: ParserTestCase.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** ParserTestCase.java 26 Oct 2003 19:46:25 -0000 1.36 --- ParserTestCase.java 1 Nov 2003 04:03:21 -0000 1.37 *************** *** 56,60 **** public class ParserTestCase extends TestCase { ! static boolean mCaseInsensitiveComparisons = true; protected Parser parser; protected Node node []; --- 56,60 ---- public class ParserTestCase extends TestCase { ! static boolean mCaseInsensitiveComparisons = false; protected Parser parser; protected Node node []; |
From: <der...@us...> - 2003-11-01 02:22:11
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv22279/tagTests Modified Files: TagTest.java Log Message: Convert native characters in tests to unicode. Mostly this was the division sign (\u00f7) used in tests of character entity reference translation, but other accented characters in TagTest were also problematic. This only covers the files observed to cause failed testcases when compiled and run under the en_US.UTF-8 native encoding, i.e. non ASCII. Files were converted with the native2ascii tool supplied wth the JDK. Index: TagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/TagTest.java,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** TagTest.java 26 Oct 2003 19:46:27 -0000 1.51 --- TagTest.java 1 Nov 2003 02:22:08 -0000 1.52 *************** *** 488,492 **** public void testIncorrectInvertedCommas2() throws ParserException { ! String guts = "META NAME=\"Keywords\" CONTENT=Moscou, modernisation, politique urbaine, spécificités culturelles, municipalité, Moscou, modernisation, urban politics, cultural specificities, municipality\""; String testHTML = "<" + guts + ">"; createParser(testHTML); --- 488,492 ---- public void testIncorrectInvertedCommas2() throws ParserException { ! String guts = "META NAME=\"Keywords\" CONTENT=Moscou, modernisation, politique urbaine, sp\u00e9cificit\u00e9s culturelles, municipalit\u00e9, Moscou, modernisation, urban politics, cultural specificities, municipality\""; String testHTML = "<" + guts + ">"; createParser(testHTML); *************** *** 498,507 **** public void testIncorrectInvertedCommas3() throws ParserException { ! String testHTML = new String("<meta name=\"description\" content=\"Une base de données sur les thèses de g\"ographie soutenues en France \">"); createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertEquals("Node contents","meta name=\"description\" content=\"Une base de données sur les thèses de g\"ographie soutenues en France \"",tag.getText()); } --- 498,507 ---- public void testIncorrectInvertedCommas3() throws ParserException { ! String testHTML = new String("<meta name=\"description\" content=\"Une base de donn\u00e9es sur les th\u00e8ses de g\"ographie soutenues en France \">"); createParser(testHTML); parseAndAssertNodeCount(1); assertTrue("Node should be a tag",node[0] instanceof Tag); Tag tag = (Tag)node[0]; ! assertEquals("Node contents","meta name=\"description\" content=\"Une base de donn\u00e9es sur les th\u00e8ses de g\"ographie soutenues en France \"",tag.getText()); } |
From: <der...@us...> - 2003-11-01 02:22:11
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1:/tmp/cvs-serv22279/utilTests Modified Files: CharacterTranslationTest.java Log Message: Convert native characters in tests to unicode. Mostly this was the division sign (\u00f7) used in tests of character entity reference translation, but other accented characters in TagTest were also problematic. This only covers the files observed to cause failed testcases when compiled and run under the en_US.UTF-8 native encoding, i.e. non ASCII. Files were converted with the native2ascii tool supplied wth the JDK. Index: CharacterTranslationTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** CharacterTranslationTest.java 26 Oct 2003 19:46:27 -0000 1.34 --- CharacterTranslationTest.java 1 Nov 2003 02:22:08 -0000 1.35 *************** *** 48,52 **** assertEquals ( "character entity reference at start of string doesn't work", ! "÷ is the division sign.", Translate.decode ("÷ is the division sign.")); } --- 48,52 ---- assertEquals ( "character entity reference at start of string doesn't work", ! "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } *************** *** 56,60 **** assertEquals ( "numeric character reference at start of string doesn't work", ! "÷ is the division sign.", Translate.decode ("÷ is the division sign.")); } --- 56,60 ---- assertEquals ( "numeric character reference at start of string doesn't work", ! "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } *************** *** 64,68 **** assertEquals ( "character entity reference without a semicolon at start of string doesn't work", ! "÷ is the division sign.", Translate.decode ("÷ is the division sign.")); } --- 64,68 ---- assertEquals ( "character entity reference without a semicolon at start of string doesn't work", ! "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } *************** *** 72,76 **** assertEquals ( "numeric character reference without a semicolon at start of string doesn't work", ! "÷ is the division sign.", Translate.decode ("÷ is the division sign.")); } --- 72,76 ---- assertEquals ( "numeric character reference without a semicolon at start of string doesn't work", ! "\u00f7 is the division sign.", Translate.decode ("÷ is the division sign.")); } *************** *** 80,85 **** assertEquals ( "character entity reference at end of string doesn't work", ! "The division sign (÷) is ÷", ! Translate.decode ("The division sign (÷) is ÷")); } --- 80,85 ---- assertEquals ( "character entity reference at end of string doesn't work", ! "The division sign (\u00f7) is \u00f7", ! Translate.decode ("The division sign (\u00f7) is ÷")); } *************** *** 88,93 **** assertEquals ( "numeric character reference at end of string doesn't work", ! "The division sign (÷) is ÷", ! Translate.decode ("The division sign (÷) is ÷")); } --- 88,93 ---- assertEquals ( "numeric character reference at end of string doesn't work", ! "The division sign (\u00f7) is \u00f7", ! Translate.decode ("The division sign (\u00f7) is ÷")); } *************** *** 96,101 **** assertEquals ( "character entity reference without a semicolon at end of string doesn't work", ! "The division sign (÷) is ÷", ! Translate.decode ("The division sign (÷) is ÷")); } --- 96,101 ---- assertEquals ( "character entity reference without a semicolon at end of string doesn't work", ! "The division sign (\u00f7) is \u00f7", ! Translate.decode ("The division sign (\u00f7) is ÷")); } *************** *** 104,109 **** assertEquals ( "numeric character reference without a semicolon at end of string doesn't work", ! "The division sign (÷) is ÷", ! Translate.decode ("The division sign (÷) is ÷")); } --- 104,109 ---- assertEquals ( "numeric character reference without a semicolon at end of string doesn't work", ! "The division sign (\u00f7) is \u00f7", ! Translate.decode ("The division sign (\u00f7) is ÷")); } *************** *** 112,117 **** assertEquals ( "character references within a string don't work", ! "Thus, the character entity reference ÷ is a more convenient form than ÷ for obtaining the division sign (÷)", ! Translate.decode ("Thus, the character entity reference ÷ is a more convenient form than ÷ for obtaining the division sign (÷)")); } --- 112,117 ---- assertEquals ( "character references within a string don't work", ! "Thus, the character entity reference \u00f7 is a more convenient form than \u00f7 for obtaining the division sign (\u00f7)", ! Translate.decode ("Thus, the character entity reference ÷ is a more convenient form than ÷ for obtaining the division sign (\u00f7)")); } *************** *** 137,141 **** "encode doesn't work", "Character entity reference: ÷, another: , numeric character reference: ♧.", ! Translate.encode ("Character entity reference: ÷, another: \u00a0, numeric character reference: \u2667.")); } --- 137,141 ---- "encode doesn't work", "Character entity reference: ÷, another: , numeric character reference: ♧.", ! Translate.encode ("Character entity reference: \u00f7, another: \u00a0, numeric character reference: \u2667.")); } |
From: <der...@us...> - 2003-11-01 02:22:11
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests In directory sc8-pr-cvs1:/tmp/cvs-serv22279/nodeDecoratorTests Modified Files: DecodingNodeTest.java Log Message: Convert native characters in tests to unicode. Mostly this was the division sign (\u00f7) used in tests of character entity reference translation, but other accented characters in TagTest were also problematic. This only covers the files observed to cause failed testcases when compiled and run under the en_US.UTF-8 native encoding, i.e. non ASCII. Files were converted with the native2ascii tool supplied wth the JDK. Index: DecodingNodeTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/nodeDecoratorTests/DecodingNodeTest.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** DecodingNodeTest.java 26 Oct 2003 19:46:26 -0000 1.14 --- DecodingNodeTest.java 1 Nov 2003 02:22:07 -0000 1.15 *************** *** 80,84 **** String DECODED_DIVISION_SIGN = ! "÷ is the division sign."; assertEquals( --- 80,84 ---- String DECODED_DIVISION_SIGN = ! "\u00f7 is the division sign."; assertEquals( *************** *** 92,100 **** String ENCODED_REFERENCE_IN_STRING = "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (÷)"; String DECODED_REFERENCE_IN_STRING = ! "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (÷)"; assertEquals ( --- 92,100 ---- String ENCODED_REFERENCE_IN_STRING = "Thus, the character entity reference ÷ is a more convenient" + ! " form than ÷ for obtaining the division sign (\u00f7)"; String DECODED_REFERENCE_IN_STRING = ! "Thus, the character entity reference \u00f7 is a more convenient" + ! " form than \u00f7 for obtaining the division sign (\u00f7)"; assertEquals ( |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv16662/tags Modified Files: DoctypeTag.java FormTag.java FrameSetTag.java FrameTag.java ImageTag.java JspTag.java LinkTag.java Log Message: Remove deprecated method calls: elementBegin()->getStartPosition() elementEnd()->getEndPosition() Index: DoctypeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/DoctypeTag.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** DoctypeTag.java 26 Oct 2003 19:46:23 -0000 1.31 --- DoctypeTag.java 1 Nov 2003 01:36:57 -0000 1.32 *************** *** 47,51 **** String guts = toHtml(); guts = guts.substring (1, guts.length () - 2); ! return "Doctype Tag : "+guts+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } } --- 47,51 ---- String guts = toHtml(); guts = guts.substring (1, guts.length () - 2); ! return "Doctype Tag : "+guts+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); } } Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** FormTag.java 29 Oct 2003 03:31:17 -0000 1.37 --- FormTag.java 1 Nov 2003 01:36:57 -0000 1.38 *************** *** 161,165 **** public String toString() { ! return "FORM TAG : Form at "+getFormLocation()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } --- 161,165 ---- public String toString() { ! return "FORM TAG : Form at "+getFormLocation()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); } Index: FrameSetTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameSetTag.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** FrameSetTag.java 26 Oct 2003 19:46:23 -0000 1.29 --- FrameSetTag.java 1 Nov 2003 01:36:57 -0000 1.30 *************** *** 48,52 **** public String toString() { ! return "FRAMESET TAG : begins at : "+elementBegin()+"; ends at : "+elementEnd(); } --- 48,52 ---- public String toString() { ! return "FRAMESET TAG : begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); } Index: FrameTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameTag.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** FrameTag.java 26 Oct 2003 19:46:23 -0000 1.28 --- FrameTag.java 1 Nov 2003 01:36:57 -0000 1.29 *************** *** 64,68 **** public String toString() { ! return "FRAME TAG : Frame " +getFrameName() + " at "+getFrameLocation()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } } --- 64,68 ---- public String toString() { ! return "FRAME TAG : Frame " +getFrameName() + " at "+getFrameLocation()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); } } Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** ImageTag.java 29 Oct 2003 03:31:17 -0000 1.32 --- ImageTag.java 1 Nov 2003 01:36:57 -0000 1.33 *************** *** 173,177 **** public String toString() { ! return "IMAGE TAG : Image at " + getImageURL () +"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } --- 173,177 ---- public String toString() { ! return "IMAGE TAG : Image at " + getImageURL () +"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); } Index: JspTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/JspTag.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** JspTag.java 26 Oct 2003 19:46:24 -0000 1.33 --- JspTag.java 1 Nov 2003 01:36:57 -0000 1.34 *************** *** 49,53 **** String guts = toHtml(); guts = guts.substring (1, guts.length () - 2); ! return "JSP/ASP Tag : "+guts+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } } --- 49,53 ---- String guts = toHtml(); guts = guts.substring (1, guts.length () - 2); ! return "JSP/ASP Tag : "+guts+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition (); } } Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** LinkTag.java 29 Oct 2003 03:31:17 -0000 1.39 --- LinkTag.java 1 Nov 2003 01:36:57 -0000 1.40 *************** *** 243,247 **** { StringBuffer sb = new StringBuffer(); ! sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+ ", AccessKey="); if (getAccessKey ()==null) sb.append("null\n"); --- 243,247 ---- { StringBuffer sb = new StringBuffer(); ! sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition ()+ ", AccessKey="); if (getAccessKey ()==null) sb.append("null\n"); |
From: <der...@us...> - 2003-11-01 01:37:01
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv16662 Modified Files: RemarkNode.java StringNode.java Log Message: Remove deprecated method calls: elementBegin()->getStartPosition() elementEnd()->getEndPosition() Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNode.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** RemarkNode.java 26 Oct 2003 19:46:17 -0000 1.34 --- RemarkNode.java 1 Nov 2003 01:36:56 -0000 1.35 *************** *** 77,81 **** public String toString() { ! return "Comment Tag : "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n"; } --- 77,92 ---- public String toString() { ! StringBuffer ret; ! ! ret = new StringBuffer (1024); ! ret.append ("Comment Tag : "); ! ret.append (getText ()); ! ret.append ("; begins at : "); ! ret.append (getStartPosition ()); ! ret.append ("; ends at : "); ! ret.append (getEndPosition ()); ! ret.append ("\n"); ! ! return (ret.toString ()); } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** StringNode.java 26 Oct 2003 19:46:17 -0000 1.42 --- StringNode.java 1 Nov 2003 01:36:56 -0000 1.43 *************** *** 70,75 **** } ! public String toString() { ! return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } --- 70,86 ---- } ! public String toString() ! { ! StringBuffer ret; ! ! ret = new StringBuffer (1024); ! ret.append ("Text = "); ! ret.append (getText ()); ! ret.append ("; begins at : "); ! ret.append (getStartPosition ()); ! ret.append ("; ends at : "); ! ret.append (getEndPosition ()); ! ! return (ret.toString ()); } |
From: <der...@us...> - 2003-11-01 01:37:01
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv16662/tests/tagTests Modified Files: EndTagTest.java ScriptTagTest.java Log Message: Remove deprecated method calls: elementBegin()->getStartPosition() elementEnd()->getEndPosition() Index: EndTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/EndTagTest.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** EndTagTest.java 26 Oct 2003 19:46:27 -0000 1.33 --- EndTagTest.java 1 Nov 2003 01:36:57 -0000 1.34 *************** *** 66,71 **** Tag endTag = (Tag)node[2]; assertTrue("Node should be an end Tag",endTag.isEndTag ()); ! assertEquals("endtag element begin",pos,endTag.elementBegin()); ! assertEquals("endtag element end",pos+9,endTag.elementEnd()); } } --- 66,71 ---- Tag endTag = (Tag)node[2]; assertTrue("Node should be an end Tag",endTag.isEndTag ()); ! assertEquals("endtag element begin",pos,endTag.getStartPosition ()); ! assertEquals("endtag element end",pos+9,endTag.getEndPosition ()); } } Index: ScriptTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ScriptTagTest.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** ScriptTagTest.java 26 Oct 2003 19:46:27 -0000 1.36 --- ScriptTagTest.java 1 Nov 2003 01:36:57 -0000 1.37 *************** *** 66,71 **** assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ScriptTag scriptTag = (ScriptTag)node[0]; ! assertEquals("Script Tag Begin",0,scriptTag.elementBegin()); ! assertEquals("Script Tag End",28,scriptTag.elementEnd()); assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); } --- 66,71 ---- assertTrue("Node should be a script tag",node[0] instanceof ScriptTag); ScriptTag scriptTag = (ScriptTag)node[0]; ! assertEquals("Script Tag Begin",0,scriptTag.getStartPosition ()); ! assertEquals("Script Tag End",28,scriptTag.getEndPosition ()); assertEquals("Script Tag Code","Script Code",scriptTag.getScriptCode()); } |
From: <der...@us...> - 2003-11-01 01:37:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv16662/lexer/nodes Modified Files: RemarkNode.java StringNode.java TagNode.java Log Message: Remove deprecated method calls: elementBegin()->getStartPosition() elementEnd()->getEndPosition() Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** RemarkNode.java 26 Oct 2003 19:46:18 -0000 1.10 --- RemarkNode.java 1 Nov 2003 01:36:56 -0000 1.11 *************** *** 62,67 **** String ret; ! start = elementBegin () + 4; ! end = elementEnd () - 3; if (start >= end) ret = ""; --- 62,67 ---- String ret; ! start = getStartPosition () + 4; ! end = getEndPosition () - 3; if (start >= end) ret = ""; *************** *** 77,82 **** } ! public String toHtml() { ! return (mPage.getText (elementBegin (), elementEnd ())); } /** --- 77,83 ---- } ! public String toHtml() ! { ! return (mPage.getText (getStartPosition (), getEndPosition ())); } /** *************** *** 88,93 **** Cursor end; ! start = new Cursor (getPage (), elementBegin ()); ! end = new Cursor (getPage (), elementEnd ()); return ("Rem (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 89,94 ---- Cursor end; ! start = new Cursor (getPage (), getStartPosition ()); ! end = new Cursor (getPage (), getEndPosition ()); return ("Rem (" + start.toString () + "," + end.toString () + "): " + getText ()); } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** StringNode.java 26 Oct 2003 19:46:18 -0000 1.11 --- StringNode.java 1 Nov 2003 01:36:56 -0000 1.12 *************** *** 90,94 **** public String toHtml () { ! return (mPage.getText (elementBegin (), elementEnd ())); } --- 90,94 ---- public String toHtml () { ! return (mPage.getText (getStartPosition (), getEndPosition ())); } *************** *** 98,103 **** Cursor end; ! start = new Cursor (getPage (), elementBegin ()); ! end = new Cursor (getPage (), elementEnd ()); return ("Txt (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 98,103 ---- Cursor end; ! start = new Cursor (getPage (), getStartPosition ()); ! end = new Cursor (getPage (), getEndPosition ()); return ("Txt (" + start.toString () + "," + end.toString () + "): " + getText ()); } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** TagNode.java 27 Oct 2003 02:18:04 -0000 1.20 --- TagNode.java 1 Nov 2003 01:36:56 -0000 1.21 *************** *** 577,582 **** output = (TagNode)lexer.nextNode (); mPage = output.getPage (); ! nodeBegin = output.elementBegin (); ! nodeEnd = output.elementEnd (); mAttributes = output.getAttributesEx (); } --- 577,582 ---- output = (TagNode)lexer.nextNode (); mPage = output.getPage (); ! nodeBegin = output.getStartPosition (); ! nodeEnd = output.getEndPosition (); mAttributes = output.getAttributesEx (); } *************** *** 642,647 **** else type = "Tag"; ! start = new Cursor (getPage (), elementBegin ()); ! end = new Cursor (getPage (), elementEnd ()); return (type + " (" + start.toString () + "," + end.toString () + "): " + getText ()); } --- 642,647 ---- else type = "Tag"; ! start = new Cursor (getPage (), getStartPosition ()); ! end = new Cursor (getPage (), getEndPosition ()); return (type + " (" + start.toString () + "," + end.toString () + "): " + getText ()); } |
From: <der...@us...> - 2003-11-01 01:37:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv16662/scanners Modified Files: CompositeTagScanner.java ScriptScanner.java TagScanner.java Log Message: Remove deprecated method calls: elementBegin()->getStartPosition() elementEnd()->getEndPosition() Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.77 retrieving revision 1.78 diff -C2 -d -r1.77 -r1.78 *** CompositeTagScanner.java 31 Oct 2003 12:56:08 -0000 1.77 --- CompositeTagScanner.java 1 Nov 2003 01:36:57 -0000 1.78 *************** *** 220,225 **** { // insert a virtual end tag and backup one node ! endTag = createVirtualEndTag (tag, lexer.getPage (), next.elementBegin ()); ! lexer.setPosition (next.elementBegin ()); node = null; } --- 220,225 ---- { // insert a virtual end tag and backup one node ! endTag = createVirtualEndTag (tag, lexer.getPage (), next.getStartPosition ()); ! lexer.setPosition (next.getStartPosition ()); node = null; } *************** *** 245,249 **** endTag = createVirtualEndTag (tag, lexer.getPage (), lexer.getCursor ().getPosition ()); ! composite = (CompositeTag)createTag (lexer.getPage (), tag.elementBegin (), endTag.elementEnd (), tag.getAttributesEx (), tag, endTag, nodeList); composite.setThisScanner (this); for (int i = 0; i < composite.getChildCount (); i++) --- 245,249 ---- endTag = createVirtualEndTag (tag, lexer.getPage (), lexer.getCursor ().getPosition ()); ! composite = (CompositeTag)createTag (lexer.getPage (), tag.getStartPosition (), endTag.getEndPosition (), tag.getAttributesEx (), tag, endTag, nodeList); composite.setThisScanner (this); for (int i = 0; i < composite.getChildCount (); i++) Index: ScriptScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** ScriptScanner.java 31 Oct 2003 12:56:08 -0000 1.47 --- ScriptScanner.java 1 Nov 2003 01:36:57 -0000 1.48 *************** *** 164,168 **** end = new Tag (lexer.getPage (), tag.getEndPosition (), tag.getEndPosition (), new Vector ()); //TODO: use the factory: ! ret = createTag (lexer.getPage (), tag.elementBegin(), end.elementEnd(), tag.getAttributesEx (), tag, end, new NodeList (last)); ret.setThisScanner (this); } --- 164,168 ---- end = new Tag (lexer.getPage (), tag.getEndPosition (), tag.getEndPosition (), new Vector ()); //TODO: use the factory: ! ret = createTag (lexer.getPage (), tag.getStartPosition (), end.getEndPosition (), tag.getAttributesEx (), tag, end, new NodeList (last)); ret.setThisScanner (this); } Index: TagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** TagScanner.java 28 Oct 2003 12:54:21 -0000 1.46 --- TagScanner.java 1 Nov 2003 01:36:57 -0000 1.47 *************** *** 134,138 **** Tag ret; ! ret = createTag(lexer.getPage (), tag.elementBegin(), tag.elementEnd(), tag.getAttributesEx (), tag, url); ret.setThisScanner(this); --- 134,138 ---- Tag ret; ! ret = createTag(lexer.getPage (), tag.getStartPosition (), tag.getEndPosition (), tag.getAttributesEx (), tag, url); ret.setThisScanner(this); |
From: <der...@us...> - 2003-11-01 01:37:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv16662/tests/scannersTests Modified Files: CompositeTagScannerTest.java Log Message: Remove deprecated method calls: elementBegin()->getStartPosition() elementEnd()->getEndPosition() Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.48 retrieving revision 1.49 diff -C2 -d -r1.48 -r1.49 *** CompositeTagScannerTest.java 31 Oct 2003 12:56:09 -0000 1.48 --- CompositeTagScannerTest.java 1 Nov 2003 01:36:57 -0000 1.49 *************** *** 84,89 **** assertEquals("child count",0,customTag.getChildCount()); assertTrue("custom tag should be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",9,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 84,89 ---- assertEquals("child count",0,customTag.getChildCount()); assertTrue("custom tag should be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",9,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 97,102 **** assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 97,102 ---- assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 113,118 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 113,118 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 132,139 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("custom tag starting loc",0,customTag.elementBegin()); ! assertEquals("custom tag ending loc",24,customTag.elementEnd()); Node child = customTag.childAt(0); --- 132,139 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); ! assertEquals("custom tag starting loc",0,customTag.getStartPosition ()); ! assertEquals("custom tag ending loc",24,customTag.getEndPosition ()); Node child = customTag.childAt(0); *************** *** 153,168 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("custom tag starting loc",0,customTag.elementBegin()); ! assertEquals("custom tag ending loc",27,customTag.elementEnd()); Node child = customTag.childAt(0); assertType("child",AnotherTag.class,child); AnotherTag tag = (AnotherTag)child; ! assertEquals("another tag start pos",8,tag.elementBegin()); ! assertEquals("another tag ending pos",18,tag.elementEnd()); ! assertEquals("custom end tag start pos",18,customTag.getEndTag().elementBegin()); assertStringEquals("child html",childtag,child.toHtml()); } --- 153,168 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); ! assertEquals("custom tag starting loc",0,customTag.getStartPosition ()); ! assertEquals("custom tag ending loc",27,customTag.getEndPosition ()); Node child = customTag.childAt(0); assertType("child",AnotherTag.class,child); AnotherTag tag = (AnotherTag)child; ! assertEquals("another tag start pos",8,tag.getStartPosition ()); ! assertEquals("another tag ending pos",18,tag.getEndPosition ()); ! assertEquals("custom end tag start pos",18,customTag.getEndTag().getStartPosition ()); assertStringEquals("child html",childtag,child.toHtml()); } *************** *** 262,267 **** assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 262,267 ---- assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 275,280 **** assertEquals("child count",2,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 275,280 ---- assertEquals("child count",2,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 288,293 **** assertEquals("child count",3,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",1,customTag.getEndingLineNumber()); --- 288,293 ---- assertEquals("child count",3,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",1,customTag.getEndingLineNumber()); *************** *** 305,311 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); ! assertEquals("ending loc of custom tag",17,customTag.elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 305,311 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); ! assertEquals("ending loc of custom tag",17,customTag.getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 331,336 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",9,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",17,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",1,customTag.getEndingLineNumber()); --- 331,336 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",9,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",17,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",1,customTag.getEndingLineNumber()); *************** *** 349,356 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); ! assertEquals("another tag ending loc",17,anotherTag.elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 349,356 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); ! assertEquals("another tag ending loc",17,anotherTag.getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 371,376 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); --- 371,376 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); assertEquals("starting line position",0,customTag.getStartingLineNumber()); assertEquals("ending line position",0,customTag.getEndingLineNumber()); *************** *** 402,412 **** assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().elementBegin()); ! assertEquals("ending loc",8,customTag.getStartTag().elementEnd()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); assertEquals("anotherTag child count",2,anotherTag.getChildCount()); ! assertEquals("anotherTag end loc",27,anotherTag.elementEnd()); ! assertEquals("custom end tag begin loc",27,customTag.getEndTag().elementBegin()); ! assertEquals("custom end tag end loc",36,customTag.getEndTag().elementEnd()); } --- 402,412 ---- assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertEquals("starting loc",0,customTag.getStartTag().getStartPosition ()); ! assertEquals("ending loc",8,customTag.getStartTag().getEndPosition ()); AnotherTag anotherTag = (AnotherTag)customTag.childAt(0); assertEquals("anotherTag child count",2,anotherTag.getChildCount()); ! assertEquals("anotherTag end loc",27,anotherTag.getEndPosition ()); ! assertEquals("custom end tag begin loc",27,customTag.getEndTag().getStartPosition ()); ! assertEquals("custom end tag end loc",36,customTag.getEndTag().getEndPosition ()); } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv6096/scanners Modified Files: AppletScanner.java BodyScanner.java BulletListScanner.java BulletScanner.java CompositeTagScanner.java DivScanner.java FormScanner.java FrameSetScanner.java HeadScanner.java HtmlScanner.java LabelScanner.java LinkScanner.java OptionTagScanner.java ScriptScanner.java SelectTagScanner.java SpanScanner.java StyleScanner.java TableColumnScanner.java TableRowScanner.java TableScanner.java TextareaTagScanner.java TitleScanner.java Log Message: Remove unused nameOfTagToMatch member in CompositeTag. This is the job of getID() on superclass TagScanner. Index: AppletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/AppletScanner.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** AppletScanner.java 26 Oct 2003 19:46:19 -0000 1.34 --- AppletScanner.java 31 Oct 2003 12:56:08 -0000 1.35 *************** *** 45,53 **** public AppletScanner() { - super(MATCH_STRING); } public AppletScanner(String filter) { ! super(filter,MATCH_STRING); } --- 45,52 ---- public AppletScanner() { } public AppletScanner(String filter) { ! super(filter); } Index: BodyScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BodyScanner.java,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** BodyScanner.java 28 Oct 2003 10:31:02 -0000 1.21 --- BodyScanner.java 31 Oct 2003 12:56:08 -0000 1.22 *************** *** 52,56 **** public BodyScanner(String filter) { ! super(filter,MATCH_NAME,MATCH_NAME,END_TAG_ENDERS); } --- 52,56 ---- public BodyScanner(String filter) { ! super(filter,MATCH_NAME,END_TAG_ENDERS); } Index: BulletListScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletListScanner.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** BulletListScanner.java 28 Oct 2003 03:04:18 -0000 1.20 --- BulletListScanner.java 31 Oct 2003 12:56:08 -0000 1.21 *************** *** 54,58 **** public BulletListScanner(String filter, Parser parser) { ! super(filter, MATCH_STRING, ENDERS); parser.addScanner(new BulletScanner("-bullet")); } --- 54,58 ---- public BulletListScanner(String filter, Parser parser) { ! super(filter, ENDERS); parser.addScanner(new BulletScanner("-bullet")); } Index: BulletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** BulletScanner.java 28 Oct 2003 10:31:02 -0000 1.26 --- BulletScanner.java 31 Oct 2003 12:56:08 -0000 1.27 *************** *** 54,58 **** public BulletScanner(String filter) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS); } --- 54,58 ---- public BulletScanner(String filter) { ! super(filter, ENDERS, END_TAG_ENDERS); } Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.76 retrieving revision 1.77 diff -C2 -d -r1.76 -r1.77 *** CompositeTagScanner.java 28 Oct 2003 12:54:21 -0000 1.76 --- CompositeTagScanner.java 31 Oct 2003 12:56:08 -0000 1.77 *************** *** 93,131 **** public abstract class CompositeTagScanner extends TagScanner { - protected String [] nameOfTagToMatch; protected Set tagEnderSet; private Set endTagEnderSet; private boolean balance_quotes; ! public CompositeTagScanner(String [] nameOfTagToMatch) { ! this(nameOfTagToMatch,new String[] {}); } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) { ! this("",nameOfTagToMatch,tagEnders); } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {}); } public CompositeTagScanner( String filter, - String [] nameOfTagToMatch, String [] tagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}); } public CompositeTagScanner( String filter, - String [] nameOfTagToMatch, String [] tagEnders, String [] endTagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,endTagEnders, false); } --- 93,128 ---- public abstract class CompositeTagScanner extends TagScanner { protected Set tagEnderSet; private Set endTagEnderSet; private boolean balance_quotes; ! public CompositeTagScanner() { ! this(new String[] {}); } ! public CompositeTagScanner(String [] tagEnders) { ! this("",tagEnders); } ! public CompositeTagScanner(String filter) { ! this(filter,new String [] {}); } public CompositeTagScanner( String filter, String [] tagEnders) { ! this(filter,tagEnders,new String[] {}); } public CompositeTagScanner( String filter, String [] tagEnders, String [] endTagEnders) { ! this(filter,tagEnders,endTagEnders, false); } *************** *** 135,139 **** * to pass through. This can be useful when one wishes to dynamically filter * out all tags except one type which may be programmed later than the parser. - * @param nameOfTagToMatch The tag names recognized by this scanner. * @param tagEnders The non-endtag tag names which signal that no closing * end tag was found. For example, encountering <FORM> while --- 132,135 ---- *************** *** 153,157 **** public CompositeTagScanner( String filter, - String [] nameOfTagToMatch, String [] tagEnders, String [] endTagEnders, --- 149,152 ---- *************** *** 159,163 **** { super(filter); - this.nameOfTagToMatch = nameOfTagToMatch; this.balance_quotes = balance_quotes; this.tagEnderSet = new HashSet(); --- 154,157 ---- Index: DivScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DivScanner.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** DivScanner.java 26 Oct 2003 19:46:19 -0000 1.32 --- DivScanner.java 31 Oct 2003 12:56:08 -0000 1.33 *************** *** 50,54 **** public DivScanner(String filter) { ! super(filter, MATCH_STRING); } --- 50,54 ---- public DivScanner(String filter) { ! super(filter); } Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** FormScanner.java 29 Oct 2003 03:31:17 -0000 1.52 --- FormScanner.java 31 Oct 2003 12:56:08 -0000 1.53 *************** *** 62,66 **** public FormScanner(String filter, Parser parser) { ! super(filter,MATCH_ID,formTagEnders); parser.addScanner(new InputTagScanner("-i")); parser.addScanner(new TextareaTagScanner("-t")); --- 62,66 ---- public FormScanner(String filter, Parser parser) { ! super(filter,formTagEnders); parser.addScanner(new InputTagScanner("-i")); parser.addScanner(new TextareaTagScanner("-t")); Index: FrameSetScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameSetScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** FrameSetScanner.java 26 Oct 2003 19:46:19 -0000 1.30 --- FrameSetScanner.java 31 Oct 2003 12:56:08 -0000 1.31 *************** *** 46,55 **** public FrameSetScanner() { - super(MATCH_NAME); } public FrameSetScanner(String filter) { ! super(filter,MATCH_NAME); } --- 46,54 ---- public FrameSetScanner() { } public FrameSetScanner(String filter) { ! super(filter); } Index: HeadScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HeadScanner.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** HeadScanner.java 28 Oct 2003 10:31:02 -0000 1.18 --- HeadScanner.java 31 Oct 2003 12:56:08 -0000 1.19 *************** *** 55,59 **** public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS); } --- 55,59 ---- public HeadScanner(String filter) { ! super(filter,ENDERS,END_TAG_ENDERS); } Index: HtmlScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HtmlScanner.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** HtmlScanner.java 26 Oct 2003 19:46:19 -0000 1.32 --- HtmlScanner.java 31 Oct 2003 12:56:08 -0000 1.33 *************** *** 47,51 **** public HtmlScanner(String filter) { ! super(filter, MATCH_STRING); } --- 47,51 ---- public HtmlScanner(String filter) { ! super(filter); } Index: LabelScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LabelScanner.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** LabelScanner.java 28 Oct 2003 10:31:02 -0000 1.36 --- LabelScanner.java 31 Oct 2003 12:56:08 -0000 1.37 *************** *** 43,51 **** public LabelScanner() { ! super(MATCH_NAME,MATCH_NAME); } public LabelScanner(String filter) { ! super(filter,MATCH_NAME,MATCH_NAME); } --- 43,51 ---- public LabelScanner() { ! super(MATCH_NAME); } public LabelScanner(String filter) { ! super(filter,MATCH_NAME); } Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.59 retrieving revision 1.60 diff -C2 -d -r1.59 -r1.60 *** LinkScanner.java 29 Oct 2003 03:31:17 -0000 1.59 --- LinkScanner.java 31 Oct 2003 12:56:08 -0000 1.60 *************** *** 67,71 **** */ public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS); } --- 67,71 ---- */ public LinkScanner(String filter) { ! super(filter,ENDERS,ENDTAG_ENDERS); } Index: OptionTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/OptionTagScanner.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** OptionTagScanner.java 28 Oct 2003 10:31:02 -0000 1.37 --- OptionTagScanner.java 31 Oct 2003 12:56:08 -0000 1.38 *************** *** 44,48 **** public OptionTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS); } --- 44,48 ---- public OptionTagScanner(String filter) { ! super(filter, ENDERS, END_TAG_ENDERS); } Index: ScriptScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ScriptScanner.java,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** ScriptScanner.java 28 Oct 2003 12:54:21 -0000 1.46 --- ScriptScanner.java 31 Oct 2003 12:56:08 -0000 1.47 *************** *** 52,64 **** public ScriptScanner() { ! super("",MATCH_NAME,ENDERS); } public ScriptScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS); ! } ! ! public ScriptScanner(String filter, String[] nameOfTagToMatch) { ! super(filter,nameOfTagToMatch,ENDERS); } --- 52,60 ---- public ScriptScanner() { ! super("",ENDERS); } public ScriptScanner(String filter) { ! super(filter,ENDERS); } Index: SelectTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/SelectTagScanner.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** SelectTagScanner.java 28 Oct 2003 10:31:02 -0000 1.35 --- SelectTagScanner.java 31 Oct 2003 12:56:08 -0000 1.36 *************** *** 48,52 **** public SelectTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS); } --- 48,52 ---- public SelectTagScanner(String filter) { ! super(filter, ENDERS, END_TAG_ENDERS); } Index: SpanScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/SpanScanner.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** SpanScanner.java 26 Oct 2003 19:46:21 -0000 1.34 --- SpanScanner.java 31 Oct 2003 12:56:08 -0000 1.35 *************** *** 44,48 **** public SpanScanner(String filter) { ! super(filter, MATCH_ID); } --- 44,48 ---- public SpanScanner(String filter) { ! super(filter); } Index: StyleScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/StyleScanner.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** StyleScanner.java 26 Oct 2003 19:46:21 -0000 1.29 --- StyleScanner.java 31 Oct 2003 12:56:08 -0000 1.30 *************** *** 47,55 **** public StyleScanner() { - super(MATCH_NAME); } public StyleScanner(String filter) { ! super(filter,MATCH_NAME); } --- 47,54 ---- public StyleScanner() { } public StyleScanner(String filter) { ! super(filter); } Index: TableColumnScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TableColumnScanner.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** TableColumnScanner.java 28 Oct 2003 10:31:02 -0000 1.38 --- TableColumnScanner.java 31 Oct 2003 12:56:08 -0000 1.39 *************** *** 44,58 **** public TableColumnScanner(String filter) { ! this(filter, MATCH_STRING, MATCH_STRING, new String[] {}); } public TableColumnScanner( String filter, - String[] nameOfTagToMatch, String [] tagEnders, String [] endTagEnders) { super( filter, - nameOfTagToMatch, tagEnders, endTagEnders); --- 44,56 ---- public TableColumnScanner(String filter) { ! this(filter, MATCH_STRING, new String[] {}); } public TableColumnScanner( String filter, String [] tagEnders, String [] endTagEnders) { super( filter, tagEnders, endTagEnders); Index: TableRowScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TableRowScanner.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** TableRowScanner.java 28 Oct 2003 10:31:02 -0000 1.41 --- TableRowScanner.java 31 Oct 2003 12:56:08 -0000 1.42 *************** *** 46,50 **** public TableRowScanner(String filter,Parser parser) { ! this(filter, parser, MATCH_STRING, MATCH_STRING, new String[] {}); } --- 46,50 ---- public TableRowScanner(String filter,Parser parser) { ! this(filter, parser, MATCH_STRING, new String[] {}); } *************** *** 52,61 **** String filter, Parser parser, - String[] nameOfTagToMatch, String [] tagEnders, String [] endTagEnders) { super( filter, - nameOfTagToMatch, tagEnders, endTagEnders); --- 52,59 ---- Index: TableScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TableScanner.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** TableScanner.java 28 Oct 2003 10:31:02 -0000 1.40 --- TableScanner.java 31 Oct 2003 12:56:08 -0000 1.41 *************** *** 48,52 **** public TableScanner(Parser parser,String filter) { ! super(filter, MATCH_STRING, ENDERS, ENDTAG_ENDERS); parser.addScanner(new TableRowScanner(parser)); } --- 48,52 ---- public TableScanner(Parser parser,String filter) { ! super(filter, ENDERS, ENDTAG_ENDERS); parser.addScanner(new TableRowScanner(parser)); } Index: TextareaTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TextareaTagScanner.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** TextareaTagScanner.java 28 Oct 2003 10:31:02 -0000 1.32 --- TextareaTagScanner.java 31 Oct 2003 12:56:08 -0000 1.33 *************** *** 45,49 **** public TextareaTagScanner(String filter) { ! super(filter, MATCH_NAME, ENDERS, END_TAG_ENDERS); } --- 45,49 ---- public TextareaTagScanner(String filter) { ! super(filter, ENDERS, END_TAG_ENDERS); } Index: TitleScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TitleScanner.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** TitleScanner.java 28 Oct 2003 10:31:02 -0000 1.34 --- TitleScanner.java 31 Oct 2003 12:56:09 -0000 1.35 *************** *** 46,50 **** public TitleScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS); } --- 46,50 ---- public TitleScanner(String filter) { ! super(filter,ENDERS,END_TAG_ENDERS); } |
From: <der...@us...> - 2003-10-31 12:56:12
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv6096/tests/scannersTests Modified Files: CompositeTagScannerTest.java Log Message: Remove unused nameOfTagToMatch member in CompositeTag. This is the job of getID() on superclass TagScanner. Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** CompositeTagScannerTest.java 28 Oct 2003 10:31:02 -0000 1.47 --- CompositeTagScannerTest.java 31 Oct 2003 12:56:09 -0000 1.48 *************** *** 54,62 **** protected void setUp() { - String [] arr = { - "SOMETHING" - }; scanner = ! new CompositeTagScanner(arr) { public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException { --- 54,62 ---- protected void setUp() { scanner = ! new CompositeTagScanner() { ! String [] arr = { ! "SOMETHING" ! }; public Tag createTag(Page page, int start, int end, Vector attributes, Tag startTag, Tag endTag, NodeList children) throws ParserException { *************** *** 64,68 **** } public String[] getID() { ! return null; } --- 64,68 ---- } public String[] getID() { ! return arr; } *************** *** 559,563 **** public CustomScanner(boolean selfChildrenAllowed) { ! super("", MATCH_NAME, selfChildrenAllowed ? new String[] {} : MATCH_NAME); } --- 559,563 ---- public CustomScanner(boolean selfChildrenAllowed) { ! super("", selfChildrenAllowed ? new String[] {} : MATCH_NAME); } *************** *** 586,594 **** private static final String MATCH_NAME [] = { "ANOTHER" }; public AnotherScanner() { ! super("", MATCH_NAME, new String[] {"CUSTOM"}); } public AnotherScanner(boolean acceptCustomTagsButDontAcceptCustomEndTags) { ! super("", MATCH_NAME, new String[] {}, new String[] {"CUSTOM"}); } --- 586,594 ---- private static final String MATCH_NAME [] = { "ANOTHER" }; public AnotherScanner() { ! super("", new String[] {"CUSTOM"}); } public AnotherScanner(boolean acceptCustomTagsButDontAcceptCustomEndTags) { ! super("", new String[] {}, new String[] {"CUSTOM"}); } |
From: <der...@us...> - 2003-10-29 03:31:30
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv12839/src/org/htmlparser/tags Modified Files: BaseHrefTag.java FormTag.java ImageTag.java LinkTag.java Log Message: Move LinkProcess out of scanners and into Page, untangling A, IMG and BASE scanners. Move form action determination to tag. The scanners have no special actions on behalf of tags anymore. Index: BaseHrefTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BaseHrefTag.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** BaseHrefTag.java 26 Oct 2003 19:46:22 -0000 1.28 --- BaseHrefTag.java 29 Oct 2003 03:31:17 -0000 1.29 *************** *** 30,33 **** --- 30,34 ---- package org.htmlparser.tags; + import java.util.Vector; import org.htmlparser.util.LinkProcessor; *************** *** 69,72 **** --- 70,85 ---- "--------\n"+ "Name : "+getBaseUrl(); + } + + /** + * Override this because we need a trigger to set the base HREF on the page. + * NOTE: setting of the attributes is the last thing done on the tag + * after creation. + * @param attribs The new BASE tag attributes. + */ + public void setAttributesEx (Vector attribs) + { + super.setAttributesEx (attribs); + getPage ().getLinkProcessor ().setBaseUrl (getBaseUrl ()); } } Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** FormTag.java 26 Oct 2003 19:46:23 -0000 1.36 --- FormTag.java 29 Oct 2003 03:31:17 -0000 1.37 *************** *** 31,34 **** --- 31,35 ---- import org.htmlparser.util.NodeList; + import org.htmlparser.util.ParserException; import org.htmlparser.util.SimpleNodeIterator; *************** *** 161,164 **** --- 162,200 ---- { return "FORM TAG : Form at "+getFormLocation()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); + } + + /** + * Extract the location of the image, given the tag, and the url + * of the html page in which this tag exists. + * @param tag The form tag with the 'ACTION' attribute. + * @param url URL of web page being parsed. + */ + public String extractFormLocn(String url)// throws ParserException + { + String formURL; + + formURL = getAttribute("ACTION"); + if (null == formURL) + return ""; + else + return (getPage ().getLinkProcessor ().extract (formURL, url)); + } + + /** + * Override this because we need a trigger to set the ACTION attribute. + * NOTE: setting of the children is the last thing done on the tag + * after creation. + * @param children The new list of children this node contains. + */ + public void setChildren (NodeList children) + { + String url; + + super.setChildren (children); + + // ... is it true that without an ACTION the default is to send it back to the same page? + url = extractFormLocn(getPage ().getUrl ()); + if (null != url && 0 < url.length()) + setAttribute ("ACTION",url); } } Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** ImageTag.java 26 Oct 2003 19:46:24 -0000 1.31 --- ImageTag.java 29 Oct 2003 03:31:17 -0000 1.32 *************** *** 195,197 **** --- 195,212 ---- } + /** + * Override this because we need a trigger to set the image URL. + * Need to update the imageURL string in the this tag, + * but not the SRC attribute so toHtml() outputs the right thing. + * NOTE: setting of the attributes is the last thing done on the tag + * after creation. + * @param attribs The new IMG tag attributes. + */ + public void setAttributesEx (Vector attribs) + { + String src; + + super.setAttributesEx (attribs); + imageURL = getPage ().getLinkProcessor ().extract (getImageURL (), getPage().getUrl ()); + } } Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** LinkTag.java 26 Oct 2003 19:46:24 -0000 1.38 --- LinkTag.java 29 Oct 2003 03:31:17 -0000 1.39 *************** *** 32,36 **** import org.htmlparser.Node; import org.htmlparser.scanners.LinkScanner; - import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserUtils; import org.htmlparser.util.SimpleNodeIterator; --- 32,35 ---- *************** *** 307,312 **** relativeLink = ParserUtils.removeChars(relativeLink,'\r'); } ! LinkProcessor processor = ((LinkScanner)getThisScanner ()).processor; ! return (processor.extract(relativeLink,getPage ().getUrl ())); } } --- 306,310 ---- relativeLink = ParserUtils.removeChars(relativeLink,'\r'); } ! return (getPage ().getLinkProcessor ().extract (relativeLink, getPage ().getUrl ())); } } |