htmlparser-cvs Mailing List for HTML Parser (Page 46)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <so...@us...> - 2003-08-24 18:43:09
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv15802/src/org/htmlparser/tests/scannersTests Modified Files: ImageScannerTest.java Log Message: removed unused local variables Index: ImageScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/ImageScannerTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** ImageScannerTest.java 11 Aug 2003 00:18:32 -0000 1.25 --- ImageScannerTest.java 24 Aug 2003 18:43:06 -0000 1.26 *************** *** 74,78 **** { Tag tag = new Tag(new TagData(0,0,"img width=638 height=53 border=0 usemap=\"#m\" src=http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif alt=Yahoo","")); - String link = "img width=638 height=53 border=0 usemap=\"#m\" src=http://us.a1.yimg.com/us.yimg.com/i/ww/m5v5.gif alt=Yahoo"; String url = "c:\\cvs\\html\\binaries\\yahoo.htm"; ImageScanner scanner = new ImageScanner("-i",new LinkProcessor()); --- 74,77 ---- |
From: <so...@us...> - 2003-08-24 18:42:35
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv15672/src/org/htmlparser/tests/scannersTests Modified Files: HeadScannerTest.java Log Message: removed unused local variables Index: HeadScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/HeadScannerTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** HeadScannerTest.java 11 Aug 2003 00:18:32 -0000 1.11 --- HeadScannerTest.java 24 Aug 2003 18:42:30 -0000 1.12 *************** *** 33,37 **** import junit.framework.TestSuite; - import org.htmlparser.scanners.*; import org.htmlparser.tags.*; import org.htmlparser.tests.ParserTestCase; --- 33,36 ---- *************** *** 46,50 **** public void testSimpleHead() throws ParserException { createParser("<HTML><HEAD></HEAD></HTML>"); - HeadScanner headScanner = new HeadScanner(); parser.registerDomScanners(); parseAndAssertNodeCount(1); --- 45,48 ---- *************** *** 56,60 **** public void testSimpleHeadWithoutEndTag() throws ParserException { createParser("<HTML><HEAD></HTML>"); - HeadScanner headScanner = new HeadScanner(); parser.registerDomScanners(); parseAndAssertNodeCount(1); --- 54,57 ---- *************** *** 69,73 **** public void testSimpleHeadWithBody() throws ParserException { createParser("<HTML><HEAD><BODY></HTML>"); - HeadScanner headScanner = new HeadScanner(); parser.registerDomScanners(); parseAndAssertNodeCount(1); --- 66,69 ---- |
From: <so...@us...> - 2003-08-24 18:41:39
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv15480/src/org/htmlparser/tests/scannersTests Modified Files: CompositeTagScannerTest.java Log Message: removed unused local variables Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** CompositeTagScannerTest.java 23 Aug 2003 17:14:45 -0000 1.33 --- CompositeTagScannerTest.java 24 Aug 2003 18:41:36 -0000 1.34 *************** *** 77,81 **** ); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",0,customTag.getChildCount()); assertTrue("custom tag should be xml end tag",customTag.isEmptyXmlTag()); --- 77,80 ---- *************** *** 92,96 **** ); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 91,94 ---- *************** *** 109,113 **** ); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 107,110 ---- *************** *** 119,123 **** Node child = customTag.childAt(0); assertType("child",StringNode.class,child); - StringNode text = (StringNode)child; assertStringEquals("child text","Hello",child.toPlainTextString()); } --- 116,119 ---- *************** *** 130,134 **** ); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 126,129 ---- *************** *** 140,144 **** Node child = customTag.childAt(0); assertType("child",Tag.class,child); - Tag tag = (Tag)child; assertStringEquals("child html","<HELLO>",child.toHtml()); } --- 135,138 ---- *************** *** 152,156 **** parser.addScanner(new AnotherScanner()); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 146,149 ---- *************** *** 261,265 **** createParser("<custom>"); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); --- 254,257 ---- *************** *** 278,282 **** ); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",2,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); --- 270,273 ---- *************** *** 295,299 **** ); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",2,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 286,289 ---- *************** *** 319,323 **** parseAndAssertNodeCount(2); CustomTag customTag = (CustomTag)node[0]; - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 309,312 ---- *************** *** 353,357 **** CustomTag customTag = (CustomTag)node[1]; - int x = customTag.getChildCount(); assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 342,345 ---- *************** *** 372,376 **** parser.addScanner(new AnotherScanner(true)); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should be xml end tag",customTag.isEmptyXmlTag()); --- 360,363 ---- *************** *** 395,399 **** parser.addScanner(new AnotherScanner(true)); CustomTag customTag = parseCustomTag(2); - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 382,385 ---- *************** *** 427,431 **** parser.addScanner(new AnotherScanner(true)); CustomTag customTag = parseCustomTag(1); - int x = customTag.getChildCount(); assertEquals("child count",1,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 413,416 ---- *************** *** 450,454 **** CustomTag customTag = (CustomTag)node[0]; - int x = customTag.getChildCount(); assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); --- 435,438 ---- |
From: <so...@us...> - 2003-08-24 18:39:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv15131/src/org/htmlparser/tests Modified Files: ParserTest.java ParserTestCase.java Log Message: removed unused local variables Index: ParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** ParserTest.java 11 Aug 2003 00:18:31 -0000 1.36 --- ParserTest.java 24 Aug 2003 18:39:11 -0000 1.37 *************** *** 613,617 **** ); parser.registerScanners(); - int i = 0; NodeList collectionList = new NodeList(); --- 613,616 ---- *************** *** 670,674 **** "</html>"); parser.registerScanners(); - int i = 0; NodeList collectionList = new NodeList(); --- 669,672 ---- *************** *** 712,718 **** + "</table>\n" + "</body></html>\n"); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! Node node = e.nextNode(); } } --- 710,715 ---- + "</table>\n" + "</body></html>\n"); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! e.nextNode(); } } Index: ParserTestCase.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** ParserTestCase.java 15 Aug 2003 20:51:48 -0000 1.22 --- ParserTestCase.java 24 Aug 2003 18:39:11 -0000 1.23 *************** *** 335,339 **** } - String expectedHTML = expectedTag.toHtml(); if (expectedValue==null) fail( --- 335,338 ---- *************** *** 362,366 **** } - String expectedHTML = expectedTag.toHtml(); assertStringEquals( "\nvalue for key "+key+" in tag "+expectedTag.getTagName()+" expected="+expectedValue+" but was "+actualValue+ --- 361,364 ---- |
From: <so...@us...> - 2003-08-24 18:39:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1:/tmp/cvs-serv15131/src/org/htmlparser/tests/utilTests Modified Files: BeanTest.java Log Message: removed unused local variables Index: BeanTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/BeanTest.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** BeanTest.java 11 Aug 2003 00:18:34 -0000 1.35 --- BeanTest.java 24 Aug 2003 18:39:11 -0000 1.36 *************** *** 48,55 **** import org.htmlparser.beans.LinkBean; import org.htmlparser.beans.StringBean; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.ParserException; ! public class BeanTest extends TestCase { public BeanTest (String name) --- 48,56 ---- import org.htmlparser.beans.LinkBean; import org.htmlparser.beans.StringBean; + import org.htmlparser.tests.*; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.ParserException; ! public class BeanTest extends ParserTestCase { public BeanTest (String name) *************** *** 123,127 **** file.delete (); } ! assertEquals ("stringbean text differs", text, string); } --- 124,128 ---- file.delete (); } ! assertStringEquals ("stringbean text differs", text, string); } |
From: <so...@us...> - 2003-08-24 18:35:14
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv14474/src/org/htmlparser/tests Modified Files: BadTagIdentifier.java Log Message: improved identify() Index: BadTagIdentifier.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/BadTagIdentifier.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** BadTagIdentifier.java 11 Aug 2003 00:18:31 -0000 1.7 --- BadTagIdentifier.java 24 Aug 2003 18:35:12 -0000 1.8 *************** *** 45,49 **** } ! private void identify(String string) throws Exception{ String [] tagsBeingChecked = --- 45,49 ---- } ! private void identify(String url) throws Exception{ String [] tagsBeingChecked = *************** *** 51,55 **** Parser parser = ! new Parser("http://www.amazon.com"); TagFindingVisitor tagFinder = new TagFindingVisitor(tagsBeingChecked, true); --- 51,55 ---- Parser parser = ! new Parser(url); TagFindingVisitor tagFinder = new TagFindingVisitor(tagsBeingChecked, true); |
From: <so...@us...> - 2003-08-24 18:34:29
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv14348/src/org/htmlparser/scanners Modified Files: TagScanner.java Log Message: removed unused local variables Index: TagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/TagScanner.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** TagScanner.java 15 Aug 2003 20:51:48 -0000 1.33 --- TagScanner.java 24 Aug 2003 18:34:25 -0000 1.34 *************** *** 138,142 **** * scan has begun, and hence allows us to write scanners that can work with dirty html */ ! public boolean evaluate(String s,TagScanner previousOpenScanner) { return true; } --- 138,142 ---- * scan has begun, and hence allows us to write scanners that can work with dirty html */ ! public boolean evaluate(String tagContents,TagScanner previousOpenScanner) { return true; } *************** *** 227,231 **** public String removeChars(String s,String occur) { StringBuffer newString = new StringBuffer(); - char ch; int index; do { --- 227,230 ---- *************** *** 280,284 **** * @throws ParserException */ ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { return null; } --- 279,285 ---- * @throws ParserException */ ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! return null; ! } |
From: <so...@us...> - 2003-08-24 18:34:20
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv14317/src/org/htmlparser/scanners Modified Files: FrameScanner.java Log Message: reformatted Index: FrameScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameScanner.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** FrameScanner.java 11 Aug 2003 00:18:30 -0000 1.22 --- FrameScanner.java 24 Aug 2003 18:34:16 -0000 1.23 *************** *** 86,91 **** ! public String extractFrameName(Tag tag,String url) ! { return tag.getAttribute("NAME"); } --- 86,90 ---- ! public String extractFrameName(Tag tag,String url) { return tag.getAttribute("NAME"); } |
From: <so...@us...> - 2003-08-24 18:32:41
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1:/tmp/cvs-serv14037/src/org/htmlparser/nodeDecorators Modified Files: AbstractNodeDecorator.java Log Message: removed unused imports Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** AbstractNodeDecorator.java 23 Aug 2003 17:14:45 -0000 1.5 --- AbstractNodeDecorator.java 24 Aug 2003 18:32:38 -0000 1.6 *************** *** 32,39 **** import org.htmlparser.Node; - import org.htmlparser.tags.CompositeTag; import org.htmlparser.util.NodeList; - import org.htmlparser.visitors.NodeVisitor; - public abstract class AbstractNodeDecorator implements Node { --- 32,36 ---- |
From: <so...@us...> - 2003-08-24 18:32:16
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv13947/src/org/htmlparser/lexer/nodes Modified Files: TagNode.java Log Message: removed unused local variables Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** TagNode.java 23 Aug 2003 17:14:45 -0000 1.3 --- TagNode.java 24 Aug 2003 18:32:10 -0000 1.4 *************** *** 336,340 **** Vector attributes; Attribute attribute; - String value; ret = new StringBuffer (); --- 336,339 ---- |
From: <so...@us...> - 2003-08-24 18:31:48
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv13858/src/org/htmlparser/lexer Modified Files: Page.java Log Message: removed unused imports and variables Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** Page.java 23 Aug 2003 17:14:45 -0000 1.8 --- Page.java 24 Aug 2003 18:31:44 -0000 1.9 *************** *** 33,49 **** package org.htmlparser.lexer; ! import java.io.ByteArrayInputStream; ! import java.io.IOException; ! import java.io.InputStream; ! import java.io.InputStreamReader; ! import java.io.Reader; ! import java.io.UnsupportedEncodingException; ! import java.lang.reflect.InvocationTargetException; ! import java.lang.reflect.Method; ! import java.net.URLConnection; ! import java.net.UnknownHostException; ! import java.util.Random; ! import org.htmlparser.util.ParserException; /** --- 33,41 ---- package org.htmlparser.lexer; ! import java.io.*; ! import java.lang.reflect.*; ! import java.net.*; ! import org.htmlparser.util.*; /** *************** *** 150,154 **** { InputStream stream; - Page ret; if (null == text) --- 142,145 ---- |
From: <so...@us...> - 2003-08-24 18:30:56
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv13709/src/org/htmlparser/lexer Modified Files: Lexer.java Log Message: removed unused imports Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Lexer.java 23 Aug 2003 17:14:44 -0000 1.4 --- Lexer.java 24 Aug 2003 18:30:53 -0000 1.5 *************** *** 33,49 **** package org.htmlparser.lexer; ! import java.io.IOException; ! import java.io.UnsupportedEncodingException; ! import java.net.URL; ! import java.net.URLConnection; ! import java.util.Vector; ! import org.htmlparser.Node; ! import org.htmlparser.lexer.Stream; ! import org.htmlparser.lexer.nodes.Attribute; import org.htmlparser.lexer.nodes.RemarkNode; import org.htmlparser.lexer.nodes.StringNode; ! import org.htmlparser.lexer.nodes.TagNode; ! import org.htmlparser.util.ParserException; /** --- 33,45 ---- package org.htmlparser.lexer; ! import java.io.*; ! import java.net.*; ! import java.util.*; ! import org.htmlparser.*; ! import org.htmlparser.lexer.nodes.*; import org.htmlparser.lexer.nodes.RemarkNode; import org.htmlparser.lexer.nodes.StringNode; ! import org.htmlparser.util.*; /** |
From: <so...@us...> - 2003-08-24 18:30:17
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans In directory sc8-pr-cvs1:/tmp/cvs-serv13639/src/org/htmlparser/beans Modified Files: LinkBean.java StringBean.java Log Message: removed unused private variables Index: LinkBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** LinkBean.java 11 Aug 2003 00:18:28 -0000 1.15 --- LinkBean.java 24 Aug 2003 18:30:14 -0000 1.16 *************** *** 91,95 **** Parser parser; Vector vector; - Node node; LinkTag link; URL[] ret; --- 91,94 ---- Index: StringBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/StringBean.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** StringBean.java 11 Aug 2003 00:18:28 -0000 1.20 --- StringBean.java 24 Aug 2003 18:30:14 -0000 1.21 *************** *** 536,540 **** String url; URLConnection conn; - boolean change; url = getURL (); --- 536,539 ---- *************** *** 614,617 **** --- 613,618 ---- else if (name.equalsIgnoreCase ("SCRIPT")) mIsScript = false; + if (end.breaksFlow ()) + carriage_return (); } |
From: <so...@us...> - 2003-08-24 18:29:21
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv13437/src/org/htmlparser Modified Files: Parser.java Log Message: updated fit test Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** Parser.java 11 Aug 2003 00:18:28 -0000 1.53 --- Parser.java 24 Aug 2003 18:29:18 -0000 1.54 *************** *** 899,913 **** public NodeIterator elements() throws ParserException { ! boolean remove_scanner; ! Node node; ! MetaTag meta; ! String httpEquiv; ! String charset; ! boolean restart; ! EndTag end; IteratorImpl ret; - remove_scanner = false; - restart = false; ret = new IteratorImpl (reader, resourceLocn, feedback); ret = createIteratorImpl(remove_scanner, ret); --- 899,905 ---- public NodeIterator elements() throws ParserException { ! boolean remove_scanner = false; IteratorImpl ret; ret = new IteratorImpl (reader, resourceLocn, feedback); ret = createIteratorImpl(remove_scanner, ret); |
From: <so...@us...> - 2003-08-24 18:28:05
|
Update of /cvsroot/htmlparser/htmlparser/src/fit In directory sc8-pr-cvs1:/tmp/cvs-serv13192/src/fit Modified Files: Attributes.java Log Message: updated fit test Index: Attributes.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/fit/Attributes.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Attributes.java 17 Apr 2003 03:07:43 -0000 1.1 --- Attributes.java 24 Aug 2003 18:28:02 -0000 1.2 *************** *** 5,9 **** import org.htmlparser.parserHelper.*; import org.htmlparser.tags.*; - import org.htmlparser.tags.data.*; public class Attributes extends ColumnFixture { --- 5,8 ---- *************** *** 28,33 **** public void execute() throws Exception { ! Tag tag = new Tag(new TagData(0, 0, 0, 0, tagContents, null, null, false)); ! table = attParser.parseAttributes(tag); } --- 27,31 ---- public void execute() throws Exception { ! table = attParser.parseAttributes(tagContents); } |
From: <der...@us...> - 2003-08-23 21:30:23
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests In directory sc8-pr-cvs1:/tmp/cvs-serv20167/tests/visitorsTests Modified Files: AllTests.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/AllTests.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** AllTests.java 11 Aug 2003 00:18:34 -0000 1.30 --- AllTests.java 23 Aug 2003 17:14:46 -0000 1.31 *************** *** 51,53 **** --- 51,129 ---- return suite; } + + /** + * Mainline for all suites of tests. + * @param args Command line arguments. The following options + * are understood: + * <pre> + * -text -- use junit.textui.TestRunner + * -awt -- use junit.awtui.TestRunner + * -swing -- use junit.swingui.TestRunner (default) + * </pre> + * All other options are passed on to the junit framework. + */ + public static void main(String[] args) + { + String runner; + int i; + String arguments[]; + Class cls; + + runner = null; + for (i = 0; (i < args.length) && (null == runner); i++) + { + if (args[i].equalsIgnoreCase ("-text")) + runner = "junit.textui.TestRunner"; + else if (args[i].equalsIgnoreCase ("-awt")) + runner = "junit.awtui.TestRunner"; + else if (args[i].equalsIgnoreCase ("-swing")) + runner = "junit.swingui.TestRunner"; + } + if (null != runner) + { + // remove it from the arguments + arguments = new String[args.length - 1]; + System.arraycopy (args, 0, arguments, 0, i - 1); + System.arraycopy (args, i, arguments, i - 1, args.length - i); + args = arguments; + } + else + runner = "junit.swingui.TestRunner"; + + /* + * from http://www.mail-archive.com/commons-user%40jakarta.apache.org/msg02958.html + * + * The problem is within the UI test runners of JUnit. They bring + * with them a custom classloader, which causes the + * LogConfigurationException. Unfortunately Log4j doesn't work + * either. + * + * Solution: Disable "Reload classes every run" or start JUnit with + * command line option -noloading before the name of the Testsuite. + */ + + // append the test class + arguments = new String[args.length + 2]; + System.arraycopy (args, 0, arguments, 0, args.length); + arguments[arguments.length - 2] = "-noloading"; + arguments[arguments.length - 1] = "org.htmlparser.tests.visitorsTests.AllTests"; + + // invoke main() of the test runner + try + { + cls = Class.forName (runner); + java.lang.reflect.Method method = cls.getDeclaredMethod ( + "main", new Class[] { String[].class }); + method.invoke ( + null, + new Object[] { arguments }); + } + catch (Throwable t) + { + System.err.println ( + "cannot run unit test (" + + t.getMessage () + + ")"); + } + } } |
From: <der...@us...> - 2003-08-23 21:30:22
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1:/tmp/cvs-serv20167/tests/utilTests Modified Files: NodeListTest.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: NodeListTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/NodeListTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** NodeListTest.java 11 Aug 2003 00:18:34 -0000 1.11 --- NodeListTest.java 23 Aug 2003 17:14:46 -0000 1.12 *************** *** 118,122 **** private Node createHTMLNodeObject() { Node node = new AbstractNode(10,20) { ! public void accept(NodeVisitor visitor) { } --- 118,122 ---- private Node createHTMLNodeObject() { Node node = new AbstractNode(10,20) { ! public void accept(Object visitor) { } |
From: <der...@us...> - 2003-08-23 20:43:37
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1:/tmp/cvs-serv20167/nodeDecorators Modified Files: AbstractNodeDecorator.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** AbstractNodeDecorator.java 11 Aug 2003 00:18:28 -0000 1.4 --- AbstractNodeDecorator.java 23 Aug 2003 17:14:45 -0000 1.5 *************** *** 44,48 **** } ! public void accept(NodeVisitor visitor) { delegate.accept(visitor); } --- 44,48 ---- } ! public void accept(Object visitor) { delegate.accept(visitor); } *************** *** 68,72 **** } ! public CompositeTag getParent() { return delegate.getParent(); } --- 68,72 ---- } ! public Node getParent() { return delegate.getParent(); } *************** *** 76,82 **** } ! public void setParent(CompositeTag tag) { ! delegate.setParent(tag); } public void setText(String text) { --- 76,100 ---- } ! public void setParent(Node node) { ! delegate.setParent(node); } + + /** + * Get the children of this node. + * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. + */ + public NodeList getChildren () + { + return (delegate.getChildren ()); + } + + /** + * Set the children of this node. + * @param children The new list of children this node contains. + */ + public void setChildren (NodeList children) + { + delegate.setChildren (children); + } public void setText(String text) { |
From: <der...@us...> - 2003-08-23 20:43:35
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv20167/tests/tagTests Modified Files: CompositeTagTest.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: CompositeTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/CompositeTagTest.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** CompositeTagTest.java 11 Aug 2003 00:18:33 -0000 1.3 --- CompositeTagTest.java 23 Aug 2003 17:14:46 -0000 1.4 *************** *** 61,65 **** assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); ! CompositeTag parent = stringNode[0].getParent(); assertType("should be column",TableColumn.class,parent); parent = parent.getParent(); --- 61,65 ---- assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); ! Node parent = stringNode[0].getParent(); assertType("should be column",TableColumn.class,parent); parent = parent.getParent(); *************** *** 93,97 **** assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); ! CompositeTag parent = stringNode[0].getParent(); int pos = parent.findPositionOf(stringNode[0]); assertEquals("position",5,pos); --- 93,97 ---- assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); ! CompositeTag parent = (CompositeTag)stringNode[0].getParent(); int pos = parent.findPositionOf(stringNode[0]); assertEquals("position",5,pos); |
From: <der...@us...> - 2003-08-23 19:15:25
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv20167/parserHelper Modified Files: SpecialHashtable.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: SpecialHashtable.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/SpecialHashtable.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** SpecialHashtable.java 11 Aug 2003 00:18:29 -0000 1.2 --- SpecialHashtable.java 23 Aug 2003 17:14:45 -0000 1.3 *************** *** 30,34 **** import java.util.Hashtable; - import org.htmlparser.tags.Tag; /** --- 30,33 ---- *************** *** 75,81 **** ret = getRaw (key); ! if (Tag.NULLVALUE == ret) ret = null; ! else if (Tag.NOTHING == ret) ret = ""; --- 74,80 ---- ret = getRaw (key); ! if ("$<NULL>$" == ret) ret = null; ! else if ("$<NOTHING>$" == ret) ret = ""; |
From: <der...@us...> - 2003-08-23 19:15:23
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv20167/tags Modified Files: CompositeTag.java LinkTag.java SelectTag.java Tag.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.48 retrieving revision 1.49 diff -C2 -d -r1.48 -r1.49 *** CompositeTag.java 11 Aug 2003 00:18:30 -0000 1.48 --- CompositeTag.java 23 Aug 2003 17:14:45 -0000 1.49 *************** *** 39,65 **** public abstract class CompositeTag extends Tag { protected Tag startTag, endTag; - protected NodeList childTags; public CompositeTag(TagData tagData, CompositeTagData compositeTagData) { super(tagData); - this.childTags = compositeTagData.getChildren(); this.startTag = compositeTagData.getStartTag(); this.endTag = compositeTagData.getEndTag(); } ! ! public SimpleNodeIterator children() { ! return childTags.elements(); } ! public Node getChild(int index) { ! return childTags.elementAt(index); } ! ! public Node [] getChildrenAsNodeArray() { ! return childTags.toNodeArray(); } ! public NodeList getChildren() { ! return childTags; } --- 39,85 ---- public abstract class CompositeTag extends Tag { protected Tag startTag, endTag; public CompositeTag(TagData tagData, CompositeTagData compositeTagData) { super(tagData); this.startTag = compositeTagData.getStartTag(); this.endTag = compositeTagData.getEndTag(); + setChildren (compositeTagData.getChildren()); } ! ! /** ! * Get an iterator over the children of this node. ! * @return Am iterator over the children of this node. ! */ ! public SimpleNodeIterator children () ! { ! return (getChildren ().elements ()); } ! /** ! * Get the child of this node at the given position. ! * @param index The in the node list of the child. ! * @return The child at that index. ! */ ! public Node getChild (int index) ! { ! return (getChildren ().elementAt (index)); } ! ! /** ! * Get the children as an array of <code>Node</code> objects. ! * @return The children in an array. ! */ ! public Node [] getChildrenAsNodeArray () ! { ! return (getChildren ().toNodeArray ()); } ! /** ! * Remove the child at the position given. ! * @param i The index of the child to remove. ! */ ! public void removeChild (int i) ! { ! getChildren ().remove (i); } *************** *** 178,183 **** * @return NodeList */ ! public NodeList searchFor(Class classType) { ! return childTags.searchFor(classType); } /** --- 198,204 ---- * @return NodeList */ ! public NodeList searchFor(Class classType) ! { ! return (getChildren ().searchFor (classType)); } /** *************** *** 247,251 **** */ public Node childAt(int index) { ! return childTags.elementAt(index); } --- 268,272 ---- */ public Node childAt(int index) { ! return (getChildren ().elementAt (index)); } *************** *** 290,294 **** public int getChildCount() { ! return childTags.size(); } --- 311,315 ---- public int getChildCount() { ! return (getChildren ().size ()); } Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** LinkTag.java 15 Aug 2003 20:51:48 -0000 1.29 --- LinkTag.java 23 Aug 2003 17:14:45 -0000 1.30 *************** *** 247,254 **** super.accept(visitor); } - - public void removeChild(int i) { - childTags.remove(i); - } - } --- 247,249 ---- Index: SelectTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/SelectTag.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** SelectTag.java 11 Aug 2003 00:18:30 -0000 1.22 --- SelectTag.java 23 Aug 2003 17:14:45 -0000 1.23 *************** *** 62,69 **** public String toString() { ! StringBuffer lString = new StringBuffer(ParserUtils.toString(this)); ! for(int i=0;i<childTags.size(); i++) { ! OptionTag optionTag = (OptionTag)childTags.elementAt(i); lString.append(optionTag.toString()).append("\n"); } --- 62,73 ---- public String toString() { ! StringBuffer lString; ! NodeList children; ! ! lString = new StringBuffer(ParserUtils.toString(this)); ! children = getChildren (); ! for(int i=0;i<children.size(); i++) { ! OptionTag optionTag = (OptionTag)children.elementAt(i); lString.append(optionTag.toString()).append("\n"); } Index: Tag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Tag.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** Tag.java 15 Aug 2003 20:51:48 -0000 1.40 --- Tag.java 23 Aug 2003 17:14:45 -0000 1.41 *************** *** 579,582 **** --- 579,592 ---- } + /** + * Handle a visitor. + * <em>NOTE: This currently defers to accept(NodeVisitor), but eventually + * subclasses of Node should be overriding accept(Object) directly.</em> + * @param visitor The <code>NodeVisitor</code> object. + */ + public void accept(Object visitor) { + accept ((NodeVisitor)visitor); + } + public void accept(NodeVisitor visitor) { visitor.visitTag(this); |
From: <der...@us...> - 2003-08-23 19:15:21
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv20167/tests/scannersTests Modified Files: CompositeTagScannerTest.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** CompositeTagScannerTest.java 11 Aug 2003 00:18:32 -0000 1.32 --- CompositeTagScannerTest.java 23 Aug 2003 17:14:45 -0000 1.33 *************** *** 508,512 **** Node firstChild = customTag.childAt(0); assertType("firstChild",StringNode.class,firstChild); ! CompositeTag parent = firstChild.getParent(); assertNotNull("first child parent should not be null",parent); assertSame("parent and custom tag should be the same",customTag,parent); --- 508,512 ---- Node firstChild = customTag.childAt(0); assertType("firstChild",StringNode.class,firstChild); ! Node parent = firstChild.getParent(); assertNotNull("first child parent should not be null",parent); assertSame("parent and custom tag should be the same",customTag,parent); |
From: <der...@us...> - 2003-08-23 19:15:11
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1:/tmp/cvs-serv20167/util Modified Files: ChainedException.java NodeList.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: ChainedException.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ChainedException.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** ChainedException.java 11 Aug 2003 00:18:35 -0000 1.35 --- ChainedException.java 23 Aug 2003 17:14:46 -0000 1.36 *************** *** 65,70 **** import java.io.PrintStream; import java.io.PrintWriter; ! import java.util.ArrayList; ! import java.util.List; public class ChainedException --- 65,69 ---- import java.io.PrintStream; import java.io.PrintWriter; ! import java.util.Vector; public class ChainedException *************** *** 93,109 **** public String[] getMessageChain() { ! List list = getMessageList(); String[] chain = new String[list.size()]; ! for (int i = 0; i < list.size(); i++) ! { ! chain[i] = (String)list.get(i); ! } return chain; } ! public List getMessageList() { ! ArrayList list = new ArrayList(); ! list.add(getMessage()); if (throwable != null) { --- 92,105 ---- public String[] getMessageChain() { ! Vector list = getMessageList(); String[] chain = new String[list.size()]; ! list.copyInto (chain); return chain; } ! public Vector getMessageList() { ! Vector list = new Vector(); ! list.addElement(getMessage()); if (throwable != null) { *************** *** 111,115 **** { ChainedException chain = (ChainedException)throwable; ! list.addAll(chain.getMessageList()); } else --- 107,113 ---- { ChainedException chain = (ChainedException)throwable; ! Vector sublist = chain.getMessageList (); ! for (int i = 0; i < sublist.size (); i++) ! list.addElement (sublist.elementAt (i)); } else *************** *** 118,122 **** if (message != null && !message.equals("")) { ! list.add(message); } } --- 116,120 ---- if (message != null && !message.equals("")) { ! list.addElement (message); } } Index: NodeList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** NodeList.java 11 Aug 2003 00:18:36 -0000 1.38 --- NodeList.java 23 Aug 2003 17:14:46 -0000 1.39 *************** *** 33,37 **** import org.htmlparser.Node; - import org.htmlparser.tags.CompositeTag; public class NodeList implements Serializable { --- 33,36 ---- *************** *** 183,186 **** --- 182,186 ---- String name; Node node; + NodeList children; NodeList ret; *************** *** 192,197 **** if (node.getClass ().getName ().equals (name)) ret.add (node); ! if (recursive && node instanceof CompositeTag) ! ret.add (((CompositeTag)node).getChildren ().searchFor (classType, recursive)); } --- 192,201 ---- if (node.getClass ().getName ().equals (name)) ret.add (node); ! if (recursive) ! { ! children = node.getChildren (); ! if (null != children) ! ret.add (children.searchFor (classType, recursive)); ! } } |
From: <der...@us...> - 2003-08-23 17:50:00
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv20167/lexer/nodes Modified Files: RemarkNode.java StringNode.java TagNode.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RemarkNode.java 17 Aug 2003 16:09:28 -0000 1.1 --- RemarkNode.java 23 Aug 2003 17:14:45 -0000 1.2 *************** *** 33,37 **** import org.htmlparser.lexer.Page; import org.htmlparser.util.NodeList; - import org.htmlparser.visitors.NodeVisitor; /** --- 33,36 ---- *************** *** 86,92 **** } ! public void accept(NodeVisitor visitor) { ! // todo: fix this ! // visitor.visitRemarkNode(this); } } --- 85,89 ---- } ! public void accept(Object visitor) { } } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** StringNode.java 17 Aug 2003 16:09:28 -0000 1.1 --- StringNode.java 23 Aug 2003 17:14:45 -0000 1.2 *************** *** 34,38 **** import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; - import org.htmlparser.visitors.NodeVisitor; /** --- 34,37 ---- *************** *** 106,113 **** } ! public void accept (NodeVisitor visitor) { - // todo: fix this - // visitor.visitStringNode (this); } } --- 105,110 ---- } ! public void accept (Object visitor) { } } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** TagNode.java 21 Aug 2003 01:52:23 -0000 1.2 --- TagNode.java 23 Aug 2003 17:14:45 -0000 1.3 *************** *** 30,48 **** import java.util.Enumeration; - import java.util.HashSet; import java.util.Hashtable; - import java.util.Map; import java.util.Vector; - import org.htmlparser.lexer.Cursor; import org.htmlparser.lexer.Page; import org.htmlparser.parserHelper.SpecialHashtable; - import org.htmlparser.parserHelper.TagParser; - import org.htmlparser.scanners.TagScanner; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; ! import org.htmlparser.visitors.NodeVisitor; /** ! * Tag represents a generic tag. This class allows users to register specific * tag scanners, which can identify links, or image references. This tag asks the * scanners to run over the text, and identify. It can be used to dynamically --- 30,44 ---- import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; import org.htmlparser.lexer.Cursor; + import org.htmlparser.lexer.Page; import org.htmlparser.parserHelper.SpecialHashtable; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; ! /** ! * TagNode represents a generic tag. This class allows users to register specific * tag scanners, which can identify links, or image references. This tag asks the * scanners to run over the text, and identify. It can be used to dynamically *************** *** 63,67 **** private final static String EMPTY_STRING=""; - private static TagParser tagParser; private boolean emptyXmlTag = false; --- 59,62 ---- *************** *** 72,118 **** protected Vector mAttributes; - /** - * Scanner associated with this tag (useful for extraction of filtering data from a - * HTML node) - */ - protected TagScanner thisScanner = null; - /** * Set of tags that breaks the flow. */ ! protected static HashSet breakTags; static { ! breakTags = new HashSet (30); ! breakTags.add ("BLOCKQUOTE"); ! breakTags.add ("BODY"); ! breakTags.add ("BR"); ! breakTags.add ("CENTER"); ! breakTags.add ("DD"); ! breakTags.add ("DIR"); ! breakTags.add ("DIV"); ! breakTags.add ("DL"); ! breakTags.add ("DT"); ! breakTags.add ("FORM"); ! breakTags.add ("H1"); ! breakTags.add ("H2"); ! breakTags.add ("H3"); ! breakTags.add ("H4"); ! breakTags.add ("H5"); ! breakTags.add ("H6"); ! breakTags.add ("HEAD"); ! breakTags.add ("HR"); ! breakTags.add ("HTML"); ! breakTags.add ("ISINDEX"); ! breakTags.add ("LI"); ! breakTags.add ("MENU"); ! breakTags.add ("NOFRAMES"); ! breakTags.add ("OL"); ! breakTags.add ("P"); ! breakTags.add ("PRE"); ! breakTags.add ("TD"); ! breakTags.add ("TH"); ! breakTags.add ("TITLE"); ! breakTags.add ("UL"); } --- 67,107 ---- protected Vector mAttributes; /** * Set of tags that breaks the flow. */ ! protected static Hashtable breakTags; static { ! breakTags = new Hashtable (30); ! breakTags.put ("BLOCKQUOTE", Boolean.TRUE); ! breakTags.put ("BODY", Boolean.TRUE); ! breakTags.put ("BR", Boolean.TRUE); ! breakTags.put ("CENTER", Boolean.TRUE); ! breakTags.put ("DD", Boolean.TRUE); ! breakTags.put ("DIR", Boolean.TRUE); ! breakTags.put ("DIV", Boolean.TRUE); ! breakTags.put ("DL", Boolean.TRUE); ! breakTags.put ("DT", Boolean.TRUE); ! breakTags.put ("FORM", Boolean.TRUE); ! breakTags.put ("H1", Boolean.TRUE); ! breakTags.put ("H2", Boolean.TRUE); ! breakTags.put ("H3", Boolean.TRUE); ! breakTags.put ("H4", Boolean.TRUE); ! breakTags.put ("H5", Boolean.TRUE); ! breakTags.put ("H6", Boolean.TRUE); ! breakTags.put ("HEAD", Boolean.TRUE); ! breakTags.put ("HR", Boolean.TRUE); ! breakTags.put ("HTML", Boolean.TRUE); ! breakTags.put ("ISINDEX", Boolean.TRUE); ! breakTags.put ("LI", Boolean.TRUE); ! breakTags.put ("MENU", Boolean.TRUE); ! breakTags.put ("NOFRAMES", Boolean.TRUE); ! breakTags.put ("OL", Boolean.TRUE); ! breakTags.put ("P", Boolean.TRUE); ! breakTags.put ("PRE", Boolean.TRUE); ! breakTags.put ("TD", Boolean.TRUE); ! breakTags.put ("TH", Boolean.TRUE); ! breakTags.put ("TITLE", Boolean.TRUE); ! breakTags.put ("UL", Boolean.TRUE); } *************** *** 132,145 **** /** - * Locate the tag withing the input string, by parsing from the given position - * @param reader HTML reader to be provided so as to allow reading of next line - * @param input Input String - * @param position Position to start parsing from - */ - // public static Tag find(NodeReader reader,String input,int position) { - // return tagParser.find(reader,input,position); - // } - - /** * In case the tag is parsed at the scan method this will return value of a * parameter not implemented yet --- 121,124 ---- *************** *** 202,206 **** // special handling for the node name attribute = (Attribute)attributes.elementAt (0); ! ret.put (org.htmlparser.tags.Tag.TAGNAME, attribute.getName ().toUpperCase ()); // the rest for (int i = 1; i < attributes.size (); i++) --- 181,185 ---- // special handling for the node name attribute = (Attribute)attributes.elementAt (0); ! ret.put (TAGNAME, attribute.getName ().toUpperCase ()); // the rest for (int i = 1; i < attributes.size (); i++) *************** *** 235,239 **** } else ! ret.put (org.htmlparser.tags.Tag.TAGNAME, ""); return (ret); --- 214,218 ---- } else ! ret.put (TAGNAME, ""); return (ret); *************** *** 253,336 **** /** - * Return the scanner associated with this tag. - */ - public TagScanner getThisScanner() - { - return thisScanner; - } - - /** - * Extract the first word from the given string. - * Words are delimited by whitespace or equals signs. - * @param s The string to get the word from. - * @return The first word. - */ - // public static String extractWord (String s) - // { - // int length; - // boolean parse; - // char ch; - // StringBuffer ret; - // - // length = s.length (); - // ret = new StringBuffer (length); - // parse = true; - // for (int i = 0; i < length && parse; i++) - // { - // ch = s.charAt (i); - // if (Character.isWhitespace (ch) || ch == '=') - // parse = false; - // else - // ret.append (Character.toUpperCase (ch)); - // } - // - // return (ret.toString ()); - // } - - /** - * Scan the tag to see using the registered scanners, and attempt identification. - * @param url URL at which HTML page is located - * @param reader The NodeReader that is to be used for reading the url - */ - // public AbstractNode scan(Map scanners,String url,NodeReader reader) throws ParserException - // { - // if (tagContents.length()==0) return this; - // try { - // boolean found=false; - // AbstractNode retVal=null; - // // Find the first word in the scanners - // String firstWord = extractWord(tagContents.toString()); - // // Now, get the scanner associated with this. - // TagScanner scanner = (TagScanner)scanners.get(firstWord); - // - // // Now do a deep check - // if (scanner != null && - // scanner.evaluate( - // tagContents.toString(), - // reader.getPreviousOpenScanner() - // ) - // ) - // { - // found=true; - // TagScanner save; - // save = reader.getPreviousOpenScanner (); - // reader.setPreviousOpenScanner(scanner); - // retVal=scanner.createScannedNode(this,url,reader,tagLine); - // reader.setPreviousOpenScanner(save); - // } - // - // if (!found) return this; - // else { - // return retVal; - // } - // } - // catch (Exception e) { - // String errorMsg; - // if (tagContents!=null) errorMsg = tagContents.toString(); else errorMsg="null"; - // throw new ParserException("Tag.scan() : Error while scanning tag, tag contents = "+errorMsg+", tagLine = "+tagLine,e); - // } - // } - - /** * Sets the attributes. * @param attributes The attribute collection to set. --- 232,235 ---- *************** *** 423,439 **** } } - public void setThisScanner(TagScanner scanner) - { - thisScanner = scanner; - } ! public String toPlainTextString() { return EMPTY_STRING; } /** ! * A call to a tag's toHTML() method will render it in HTML ! * Most tags that do not have children and inherit from Tag, ! * do not need to override toHTML(). * @see org.htmlparser.Node#toHtml() */ --- 322,332 ---- } } ! public String toPlainTextString() { return EMPTY_STRING; } /** ! * A call to a tag's toHTML() method will render it in HTML. * @see org.htmlparser.Node#toHtml() */ *************** *** 487,498 **** /** - * Sets the tagParser. - * @param tagParser The tagParser to set - */ - public static void setTagParser(TagParser tagParser) { - //todo: fix this Tag.tagParser = tagParser; - } - - /** * Determines if the given tag breaks the flow of text. * @return <code>true</code> if following text would start on a new line, --- 380,383 ---- *************** *** 501,505 **** public boolean breaksFlow () { ! return (breakTags.contains (getText ().toUpperCase ())); } --- 386,390 ---- public boolean breaksFlow () { ! return (breakTags.containsKey (getText ().toUpperCase ())); } *************** *** 511,517 **** * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) { ! if (thisScanner!=null && thisScanner.getFilter()==filter) ! collectionList.add(this); } --- 396,401 ---- * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) ! { } *************** *** 541,546 **** } ! public void accept(NodeVisitor visitor) { ! // todo: fix this visitor.visitTag(this); } --- 425,429 ---- } ! public void accept(Object visitor) { } |
From: <der...@us...> - 2003-08-23 17:49:53
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv20167/lexer Modified Files: Lexer.java Page.java Log Message: Sixth drop for new i/o subsystem. Isolated htmllexer.jar file and made it compileable and runnable on JDK 1.1 systems. The build.xml file now has four new targets for separate compiling and jaring of the lexer and parser. Significantly refactored the existing Node interface and AbstractNode class to achieve isolation. They now support get/setChildren(), rather than CompositeTag. Various scanners that were directly accessing the childTags node list were affected. The get/setParent is now a generic Node rather than a CompositeTag. The visitor accept() signature was changed to Object to avoid dragging in visitors code. This was *not* changed on classes derived from Tag, although it could be. ChainedException now uses/returns a Vector. Removed the cruft from lexer nodes where possible. Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Lexer.java 23 Aug 2003 01:33:06 -0000 1.3 --- Lexer.java 23 Aug 2003 17:14:44 -0000 1.4 *************** *** 107,111 **** /** * Get the next node from the source. ! * @return A RemarkNode, StringNode or Tag, or <code>null</code> if no * more lexemes are present. * @exception ParserException If there is a problem with the underlying page. --- 107,111 ---- /** * Get the next node from the source. ! * @return A RemarkNode, StringNode or TagNode, or <code>null</code> if no * more lexemes are present. * @exception ParserException If there is a problem with the underlying page. *************** *** 575,578 **** --- 575,580 ---- { System.out.println (pe.getMessage ()); + if (null != pe.getThrowable ()) + System.out.println (pe.getThrowable ().getMessage ()); } } Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** Page.java 23 Aug 2003 01:33:06 -0000 1.7 --- Page.java 23 Aug 2003 17:14:45 -0000 1.8 *************** *** 110,115 **** catch (UnknownHostException uhe) { ! Random number = new Random (); ! int message = number.nextInt (mFourOhFour.length); throw new ParserException (mFourOhFour[message], uhe); } --- 110,114 ---- catch (UnknownHostException uhe) { ! int message = (int)(Math.random () * mFourOhFour.length); throw new ParserException (mFourOhFour[message], uhe); } |