htmlparser-cvs Mailing List for HTML Parser (Page 42)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/tags Modified Files: AppletTag.java BaseHrefTag.java BodyTag.java Bullet.java BulletList.java CompositeTag.java Div.java DoctypeTag.java EndTag.java FormTag.java FrameSetTag.java FrameTag.java HeadTag.java Html.java ImageTag.java InputTag.java JspTag.java LabelTag.java LinkTag.java MetaTag.java OptionTag.java ScriptTag.java SelectTag.java Span.java StyleTag.java TableColumn.java TableRow.java TableTag.java Tag.java TextareaTag.java TitleTag.java package.html Log Message: Update version headers to 1.4-20030907 and update changelog. Index: AppletTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/AppletTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** AppletTag.java 1 Sep 2003 19:11:56 -0000 1.25 --- AppletTag.java 8 Sep 2003 02:26:29 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: BaseHrefTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BaseHrefTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** BaseHrefTag.java 3 Sep 2003 23:36:20 -0000 1.23 --- BaseHrefTag.java 8 Sep 2003 02:26:29 -0000 1.24 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: BodyTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BodyTag.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** BodyTag.java 3 Sep 2003 23:36:20 -0000 1.12 --- BodyTag.java 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Bullet.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Bullet.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** Bullet.java 3 Sep 2003 23:36:20 -0000 1.12 --- Bullet.java 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: BulletList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BulletList.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** BulletList.java 3 Sep 2003 23:36:20 -0000 1.12 --- BulletList.java 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.52 retrieving revision 1.53 diff -C2 -d -r1.52 -r1.53 *** CompositeTag.java 3 Sep 2003 23:36:20 -0000 1.52 --- CompositeTag.java 8 Sep 2003 02:26:29 -0000 1.53 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Div.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Div.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** Div.java 3 Sep 2003 23:36:20 -0000 1.12 --- Div.java 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: DoctypeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/DoctypeTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** DoctypeTag.java 3 Sep 2003 23:36:20 -0000 1.25 --- DoctypeTag.java 8 Sep 2003 02:26:29 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: EndTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/EndTag.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** EndTag.java 3 Sep 2003 23:36:20 -0000 1.28 --- EndTag.java 8 Sep 2003 02:26:29 -0000 1.29 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** FormTag.java 3 Sep 2003 23:36:20 -0000 1.31 --- FormTag.java 8 Sep 2003 02:26:29 -0000 1.32 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: FrameSetTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameSetTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** FrameSetTag.java 3 Sep 2003 23:36:20 -0000 1.23 --- FrameSetTag.java 8 Sep 2003 02:26:29 -0000 1.24 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: FrameTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** FrameTag.java 3 Sep 2003 23:36:20 -0000 1.23 --- FrameTag.java 8 Sep 2003 02:26:29 -0000 1.24 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: HeadTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/HeadTag.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** HeadTag.java 3 Sep 2003 23:36:20 -0000 1.12 --- HeadTag.java 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Html.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Html.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** Html.java 3 Sep 2003 23:36:20 -0000 1.24 --- Html.java 8 Sep 2003 02:26:29 -0000 1.25 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** ImageTag.java 3 Sep 2003 23:36:20 -0000 1.24 --- ImageTag.java 8 Sep 2003 02:26:29 -0000 1.25 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: InputTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/InputTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** InputTag.java 3 Sep 2003 23:36:20 -0000 1.24 --- InputTag.java 8 Sep 2003 02:26:29 -0000 1.25 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: JspTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/JspTag.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** JspTag.java 3 Sep 2003 23:36:20 -0000 1.26 --- JspTag.java 8 Sep 2003 02:26:29 -0000 1.27 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: LabelTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LabelTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** LabelTag.java 3 Sep 2003 23:36:20 -0000 1.25 --- LabelTag.java 8 Sep 2003 02:26:29 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** LinkTag.java 1 Sep 2003 21:28:33 -0000 1.32 --- LinkTag.java 8 Sep 2003 02:26:29 -0000 1.33 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: MetaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/MetaTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** MetaTag.java 3 Sep 2003 23:36:20 -0000 1.24 --- MetaTag.java 8 Sep 2003 02:26:29 -0000 1.25 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: OptionTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/OptionTag.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** OptionTag.java 3 Sep 2003 23:36:20 -0000 1.27 --- OptionTag.java 8 Sep 2003 02:26:29 -0000 1.28 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: ScriptTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ScriptTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** ScriptTag.java 3 Sep 2003 23:36:20 -0000 1.25 --- ScriptTag.java 8 Sep 2003 02:26:29 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: SelectTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/SelectTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** SelectTag.java 3 Sep 2003 23:36:20 -0000 1.25 --- SelectTag.java 8 Sep 2003 02:26:29 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Span.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Span.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** Span.java 3 Sep 2003 23:36:20 -0000 1.26 --- Span.java 8 Sep 2003 02:26:29 -0000 1.27 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StyleTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/StyleTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** StyleTag.java 3 Sep 2003 23:36:20 -0000 1.24 --- StyleTag.java 8 Sep 2003 02:26:29 -0000 1.25 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TableColumn.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableColumn.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** TableColumn.java 3 Sep 2003 23:36:20 -0000 1.26 --- TableColumn.java 8 Sep 2003 02:26:29 -0000 1.27 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TableRow.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableRow.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** TableRow.java 3 Sep 2003 23:36:20 -0000 1.28 --- TableRow.java 8 Sep 2003 02:26:29 -0000 1.29 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TableTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TableTag.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** TableTag.java 3 Sep 2003 23:36:20 -0000 1.29 --- TableTag.java 8 Sep 2003 02:26:29 -0000 1.30 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Tag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Tag.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** Tag.java 3 Sep 2003 23:36:20 -0000 1.45 --- Tag.java 8 Sep 2003 02:26:29 -0000 1.46 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TextareaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TextareaTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** TextareaTag.java 3 Sep 2003 23:36:20 -0000 1.23 --- TextareaTag.java 8 Sep 2003 02:26:29 -0000 1.24 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TitleTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/TitleTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** TitleTag.java 3 Sep 2003 23:36:20 -0000 1.23 --- TitleTag.java 8 Sep 2003 02:26:29 -0000 1.24 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/package.html,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** package.html 24 Aug 2003 21:59:42 -0000 1.12 --- package.html 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 4,10 **** <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030824 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha --- 4,10 ---- <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030907 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha |
From: <der...@us...> - 2003-09-08 02:27:31
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/codeMetrics In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/tests/codeMetrics Modified Files: LineCounter.java Log Message: Update version headers to 1.4-20030907 and update changelog. Index: LineCounter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/codeMetrics/LineCounter.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** LineCounter.java 3 Sep 2003 23:36:20 -0000 1.5 --- LineCounter.java 8 Sep 2003 02:26:30 -0000 1.6 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/tests/lexerTests Modified Files: AllTests.java LexerTests.java PageIndexTests.java PageTests.java SourceTests.java StreamTests.java Log Message: Update version headers to 1.4-20030907 and update changelog. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AllTests.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** AllTests.java 3 Sep 2003 23:36:21 -0000 1.8 --- AllTests.java 8 Sep 2003 02:26:30 -0000 1.9 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: LexerTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/LexerTests.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** LexerTests.java 3 Sep 2003 23:36:21 -0000 1.4 --- LexerTests.java 8 Sep 2003 02:26:30 -0000 1.5 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: PageIndexTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageIndexTests.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** PageIndexTests.java 3 Sep 2003 23:36:21 -0000 1.5 --- PageIndexTests.java 8 Sep 2003 02:26:30 -0000 1.6 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: PageTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageTests.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** PageTests.java 3 Sep 2003 23:36:21 -0000 1.7 --- PageTests.java 8 Sep 2003 02:26:30 -0000 1.8 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: SourceTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** SourceTests.java 3 Sep 2003 23:36:21 -0000 1.6 --- SourceTests.java 8 Sep 2003 02:26:30 -0000 1.7 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StreamTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** StreamTests.java 3 Sep 2003 23:36:21 -0000 1.5 --- StreamTests.java 8 Sep 2003 02:26:30 -0000 1.6 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
From: <der...@us...> - 2003-09-08 02:27:28
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/tags/data Modified Files: CompositeTagData.java FormData.java LinkData.java TagData.java Log Message: Update version headers to 1.4-20030907 and update changelog. Index: CompositeTagData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/CompositeTagData.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** CompositeTagData.java 3 Sep 2003 23:36:20 -0000 1.30 --- CompositeTagData.java 8 Sep 2003 02:26:30 -0000 1.31 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: FormData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/FormData.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FormData.java 3 Sep 2003 23:36:20 -0000 1.25 --- FormData.java 8 Sep 2003 02:26:30 -0000 1.26 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: LinkData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/LinkData.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** LinkData.java 3 Sep 2003 23:36:20 -0000 1.27 --- LinkData.java 8 Sep 2003 02:26:30 -0000 1.28 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TagData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/TagData.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** TagData.java 3 Sep 2003 23:36:20 -0000 1.28 --- TagData.java 8 Sep 2003 02:26:30 -0000 1.29 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/parserapplications Modified Files: LinkExtractor.java MailRipper.java Robot.java StringExtractor.java package.html Log Message: Update version headers to 1.4-20030907 and update changelog. Index: LinkExtractor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** LinkExtractor.java 1 Sep 2003 20:24:04 -0000 1.42 --- LinkExtractor.java 8 Sep 2003 02:26:29 -0000 1.43 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: MailRipper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/MailRipper.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** MailRipper.java 3 Sep 2003 23:36:19 -0000 1.43 --- MailRipper.java 8 Sep 2003 02:26:29 -0000 1.44 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Robot.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/Robot.java,v retrieving revision 1.45 retrieving revision 1.46 diff -C2 -d -r1.45 -r1.46 *** Robot.java 3 Sep 2003 23:36:19 -0000 1.45 --- Robot.java 8 Sep 2003 02:26:29 -0000 1.46 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StringExtractor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/StringExtractor.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** StringExtractor.java 24 Aug 2003 21:59:42 -0000 1.39 --- StringExtractor.java 8 Sep 2003 02:26:29 -0000 1.40 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/package.html,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** package.html 24 Aug 2003 21:59:42 -0000 1.12 --- package.html 8 Sep 2003 02:26:29 -0000 1.13 *************** *** 3,9 **** <head> <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030824 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha --- 3,9 ---- <head> <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030907 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha |
From: <der...@us...> - 2003-09-08 02:27:24
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/parserHelper Modified Files: AttributeParser.java CompositeTagScannerHelper.java ParserHelper.java ScriptScannerHelper.java SpecialHashtable.java StringParser.java TagParser.java Log Message: Update version headers to 1.4-20030907 and update changelog. Index: AttributeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** AttributeParser.java 3 Sep 2003 23:36:19 -0000 1.37 --- AttributeParser.java 8 Sep 2003 02:26:29 -0000 1.38 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** CompositeTagScannerHelper.java 3 Sep 2003 23:36:19 -0000 1.44 --- CompositeTagScannerHelper.java 8 Sep 2003 02:26:29 -0000 1.45 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: ParserHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ParserHelper.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** ParserHelper.java 3 Sep 2003 23:36:19 -0000 1.14 --- ParserHelper.java 8 Sep 2003 02:26:29 -0000 1.15 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: ScriptScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ScriptScannerHelper.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** ScriptScannerHelper.java 3 Sep 2003 23:36:19 -0000 1.9 --- ScriptScannerHelper.java 8 Sep 2003 02:26:29 -0000 1.10 *************** *** 1,3 **** ! //HTMLParser Library v1_4_20030824 - A java-based parser for HTML //Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! //HTMLParser Library v1_4_20030907 - A java-based parser for HTML //Copyright (C) Dec 31, 2000 Somik Raha // Index: SpecialHashtable.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/SpecialHashtable.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** SpecialHashtable.java 24 Aug 2003 21:59:42 -0000 1.4 --- SpecialHashtable.java 8 Sep 2003 02:26:29 -0000 1.5 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StringParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** StringParser.java 3 Sep 2003 23:36:19 -0000 1.37 --- StringParser.java 8 Sep 2003 02:26:29 -0000 1.38 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TagParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/TagParser.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** TagParser.java 3 Sep 2003 23:36:19 -0000 1.40 --- TagParser.java 8 Sep 2003 02:26:29 -0000 1.41 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/lexer/nodes Modified Files: AbstractNode.java Attribute.java RemarkNode.java StringNode.java TagNode.java package.html Log Message: Update version headers to 1.4-20030907 and update changelog. Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/AbstractNode.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** AbstractNode.java 3 Sep 2003 23:36:18 -0000 1.3 --- AbstractNode.java 8 Sep 2003 02:26:28 -0000 1.4 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Attribute.java 7 Sep 2003 21:28:03 -0000 1.5 --- Attribute.java 8 Sep 2003 02:26:28 -0000 1.6 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** RemarkNode.java 3 Sep 2003 23:36:18 -0000 1.4 --- RemarkNode.java 8 Sep 2003 02:26:28 -0000 1.5 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** StringNode.java 3 Sep 2003 23:36:19 -0000 1.4 --- StringNode.java 8 Sep 2003 02:26:28 -0000 1.5 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** TagNode.java 7 Sep 2003 21:28:03 -0000 1.7 --- TagNode.java 8 Sep 2003 02:26:28 -0000 1.8 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/package.html,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** package.html 24 Aug 2003 21:59:41 -0000 1.2 --- package.html 8 Sep 2003 02:26:28 -0000 1.3 *************** *** 5,11 **** <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030824 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha --- 5,11 ---- <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030907 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/nodeDecorators Modified Files: AbstractNodeDecorator.java DecodingNode.java EscapeCharacterRemovingNode.java NonBreakingSpaceConvertingNode.java Log Message: Update version headers to 1.4-20030907 and update changelog. Index: AbstractNodeDecorator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/AbstractNodeDecorator.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** AbstractNodeDecorator.java 3 Sep 2003 23:36:19 -0000 1.8 --- AbstractNodeDecorator.java 8 Sep 2003 02:26:28 -0000 1.9 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: DecodingNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/DecodingNode.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** DecodingNode.java 3 Sep 2003 23:36:19 -0000 1.8 --- DecodingNode.java 8 Sep 2003 02:26:28 -0000 1.9 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: EscapeCharacterRemovingNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/EscapeCharacterRemovingNode.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** EscapeCharacterRemovingNode.java 3 Sep 2003 23:36:19 -0000 1.6 --- EscapeCharacterRemovingNode.java 8 Sep 2003 02:26:28 -0000 1.7 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: NonBreakingSpaceConvertingNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/nodeDecorators/NonBreakingSpaceConvertingNode.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** NonBreakingSpaceConvertingNode.java 3 Sep 2003 23:36:19 -0000 1.6 --- NonBreakingSpaceConvertingNode.java 8 Sep 2003 02:26:28 -0000 1.7 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/beans Modified Files: BeanyBaby.java HTMLLinkBean.java HTMLTextBean.java LinkBean.java StringBean.java package.html Log Message: Update version headers to 1.4-20030907 and update changelog. Index: BeanyBaby.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/BeanyBaby.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BeanyBaby.java 24 Aug 2003 21:59:41 -0000 1.13 --- BeanyBaby.java 8 Sep 2003 02:26:28 -0000 1.14 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: HTMLLinkBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/HTMLLinkBean.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** HTMLLinkBean.java 24 Aug 2003 21:59:41 -0000 1.14 --- HTMLLinkBean.java 8 Sep 2003 02:26:28 -0000 1.15 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: HTMLTextBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/HTMLTextBean.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** HTMLTextBean.java 24 Aug 2003 21:59:41 -0000 1.15 --- HTMLTextBean.java 8 Sep 2003 02:26:28 -0000 1.16 *************** *** 1,3 **** ! /// HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! /// HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: LinkBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** LinkBean.java 24 Aug 2003 21:59:41 -0000 1.18 --- LinkBean.java 8 Sep 2003 02:26:28 -0000 1.19 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StringBean.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/StringBean.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** StringBean.java 24 Aug 2003 21:59:41 -0000 1.24 --- StringBean.java 8 Sep 2003 02:26:28 -0000 1.25 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/package.html,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** package.html 24 Aug 2003 21:59:41 -0000 1.12 --- package.html 8 Sep 2003 02:26:28 -0000 1.13 *************** *** 4,10 **** <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030824 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha --- 4,10 ---- <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030907 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser/lexer Modified Files: Cursor.java Lexer.java Page.java PageIndex.java Source.java Stream.java package.html Log Message: Update version headers to 1.4-20030907 and update changelog. Index: Cursor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Cursor.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Cursor.java 24 Aug 2003 21:59:41 -0000 1.5 --- Cursor.java 8 Sep 2003 02:26:28 -0000 1.6 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Lexer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Lexer.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** Lexer.java 24 Aug 2003 21:59:41 -0000 1.6 --- Lexer.java 8 Sep 2003 02:26:28 -0000 1.7 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Page.java 3 Sep 2003 23:36:18 -0000 1.11 --- Page.java 8 Sep 2003 02:26:28 -0000 1.12 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: PageIndex.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/PageIndex.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** PageIndex.java 3 Sep 2003 23:36:18 -0000 1.6 --- PageIndex.java 8 Sep 2003 02:26:28 -0000 1.7 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Source.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** Source.java 24 Aug 2003 21:59:41 -0000 1.6 --- Source.java 8 Sep 2003 02:26:28 -0000 1.7 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Stream.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Stream.java 24 Aug 2003 21:59:41 -0000 1.4 --- Stream.java 8 Sep 2003 02:26:28 -0000 1.5 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/package.html,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** package.html 24 Aug 2003 21:59:41 -0000 1.4 --- package.html 8 Sep 2003 02:26:28 -0000 1.5 *************** *** 5,11 **** <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030824 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha --- 5,11 ---- <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030907 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha |
From: <der...@us...> - 2003-09-08 02:27:17
|
Update of /cvsroot/htmlparser/htmlparser/src/fit In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/fit Modified Files: Attributes.java Log Message: Update version headers to 1.4-20030907 and update changelog. Index: Attributes.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/fit/Attributes.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** Attributes.java 24 Aug 2003 18:28:02 -0000 1.2 --- Attributes.java 8 Sep 2003 02:26:28 -0000 1.3 *************** *** 7,37 **** public class Attributes extends ColumnFixture { ! private AttributeParser attParser = new AttributeParser(); ! public String key; ! private Map table; ! public String tagContents; ! public String value() { ! return (String) table.get(key); ! } ! public String name() { ! return (String) table.get(Tag.TAGNAME); ! } ! public int attributeCount() { ! return table.size() - 1; ! } ! public void execute() throws Exception { ! table = attParser.parseAttributes(tagContents); ! } ! public void wrong (Parse cell, String actual) { ! actual = escape(actual); ! wrong(cell); ! cell.addToBody(label("expected") + "<hr>" + actual.replaceAll("\n","<BR>") + label("actual")); ! } } --- 7,37 ---- public class Attributes extends ColumnFixture { ! private AttributeParser attParser = new AttributeParser(); ! public String key; ! private Map table; ! public String tagContents; ! public String value() { ! return (String) table.get(key); ! } ! public String name() { ! return (String) table.get(Tag.TAGNAME); ! } ! public int attributeCount() { ! return table.size() - 1; ! } ! public void execute() throws Exception { ! table = attParser.parseAttributes(tagContents); ! } ! public void wrong (Parse cell, String actual) { ! actual = escape(actual); ! wrong(cell); ! cell.addToBody(label("expected") + "<hr>" + actual.replaceAll("\n","<BR>") + label("actual")); ! } } |
From: <der...@us...> - 2003-09-08 02:27:17
|
Update of /cvsroot/htmlparser/htmlparser/docs In directory sc8-pr-cvs1:/tmp/cvs-serv7654/docs Modified Files: changes.txt release.txt Log Message: Update version headers to 1.4-20030907 and update changelog. Index: changes.txt =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/changes.txt,v retrieving revision 1.188 retrieving revision 1.189 diff -C2 -d -r1.188 -r1.189 *** changes.txt 24 Aug 2003 21:59:41 -0000 1.188 --- changes.txt 8 Sep 2003 02:26:27 -0000 1.189 *************** *** 13,16 **** --- 13,260 ---- ******************************************************************************* + Integration Build 1.4 - 20030907 + -------------------------------- + + 2003-09-07 17:28 derrickoswald + + * src/org/htmlparser/lexer/nodes/: Attribute.java, TagNode.java: + + Fix setAttribute and optimize getAttribute for speed. + + 2003-09-03 19:36 derrickoswald + + * src/org/htmlparser/: AbstractNode.java, Node.java, + NodeReader.java, RemarkNode.java, StringNode.java, + StringNodeFactory.java, lexer/Page.java, lexer/PageIndex.java, + lexer/nodes/AbstractNode.java, lexer/nodes/RemarkNode.java, + lexer/nodes/StringNode.java, lexer/nodes/TagNode.java, + nodeDecorators/AbstractNodeDecorator.java, + nodeDecorators/DecodingNode.java, + nodeDecorators/EscapeCharacterRemovingNode.java, + nodeDecorators/NonBreakingSpaceConvertingNode.java, + parserHelper/AttributeParser.java, + parserHelper/CompositeTagScannerHelper.java, + parserHelper/ParserHelper.java, + parserHelper/ScriptScannerHelper.java, + parserHelper/StringParser.java, parserHelper/TagParser.java, + parserapplications/MailRipper.java, parserapplications/Robot.java, + scanners/AppletScanner.java, scanners/BaseHrefScanner.java, + scanners/BodyScanner.java, scanners/BulletListScanner.java, + scanners/BulletScanner.java, scanners/CompositeTagScanner.java, + scanners/DivScanner.java, scanners/DoctypeScanner.java, + scanners/FormScanner.java, scanners/FrameScanner.java, + scanners/FrameSetScanner.java, scanners/HeadScanner.java, + scanners/HtmlScanner.java, scanners/ImageScanner.java, + scanners/InputTagScanner.java, scanners/JspScanner.java, + scanners/LabelScanner.java, scanners/LinkScanner.java, + scanners/MetaTagScanner.java, scanners/OptionTagScanner.java, + scanners/ScriptScanner.java, scanners/SelectTagScanner.java, + scanners/SpanScanner.java, scanners/StyleScanner.java, + scanners/TableColumnScanner.java, scanners/TableRowScanner.java, + scanners/TableScanner.java, scanners/TagScanner.java, + scanners/TextareaTagScanner.java, scanners/TitleScanner.java, + tags/BaseHrefTag.java, tags/BodyTag.java, tags/Bullet.java, + tags/BulletList.java, tags/CompositeTag.java, tags/Div.java, + tags/DoctypeTag.java, tags/EndTag.java, tags/FormTag.java, + tags/FrameSetTag.java, tags/FrameTag.java, tags/HeadTag.java, + tags/Html.java, tags/ImageTag.java, tags/InputTag.java, + tags/JspTag.java, tags/LabelTag.java, tags/MetaTag.java, + tags/OptionTag.java, tags/ScriptTag.java, tags/SelectTag.java, + tags/Span.java, tags/StyleTag.java, tags/TableColumn.java, + tags/TableRow.java, tags/TableTag.java, tags/Tag.java, + tags/TextareaTag.java, tags/TitleTag.java, + tags/data/CompositeTagData.java, tags/data/FormData.java, + tags/data/LinkData.java, tags/data/TagData.java, + tests/AllTests.java, tests/AssertXmlEqualsTest.java, + tests/BadTagIdentifier.java, tests/FunctionalTests.java, + tests/InstanceofPerformanceTest.java, + tests/LineNumberAssignedByNodeReaderTest.java, + tests/ParserTest.java, tests/ParserTestCase.java, + tests/PerformanceTest.java, tests/codeMetrics/LineCounter.java, + tests/lexerTests/AllTests.java, tests/lexerTests/LexerTests.java, + tests/lexerTests/PageIndexTests.java, + tests/lexerTests/PageTests.java, tests/lexerTests/SourceTests.java, + tests/lexerTests/StreamTests.java, + tests/nodeDecoratorTests/AllTests.java, + tests/nodeDecoratorTests/DecodingNodeTest.java, + tests/nodeDecoratorTests/EscapeCharacterRemovingNodeTest.java, + tests/nodeDecoratorTests/NonBreakingSpaceConvertingNodeTest.java, + tests/parserHelperTests/AllTests.java, + tests/parserHelperTests/CompositeTagScannerHelperTest.java, + tests/parserHelperTests/StringParserTest.java, + tests/scannersTests/AllTests.java, + tests/scannersTests/BaseHREFScannerTest.java, + tests/scannersTests/BodyScannerTest.java, + tests/scannersTests/BulletListScannerTest.java, + tests/scannersTests/BulletScannerTest.java, + tests/scannersTests/CompositeTagScannerTest.java, + tests/scannersTests/DivScannerTest.java, + tests/scannersTests/FormScannerTest.java, + tests/scannersTests/FrameScannerTest.java, + tests/scannersTests/FrameSetScannerTest.java, + tests/scannersTests/HeadScannerTest.java, + tests/scannersTests/HtmlTest.java, + tests/scannersTests/ImageScannerTest.java, + tests/scannersTests/InputTagScannerTest.java, + tests/scannersTests/JspScannerTest.java, + tests/scannersTests/LabelScannerTest.java, + tests/scannersTests/LinkScannerTest.java, + tests/scannersTests/MetaTagScannerTest.java, + tests/scannersTests/OptionTagScannerTest.java, + tests/scannersTests/ScriptScannerTest.java, + tests/scannersTests/SelectTagScannerTest.java, + tests/scannersTests/SpanScannerTest.java, + tests/scannersTests/StyleScannerTest.java, + tests/scannersTests/TableScannerTest.java, + tests/scannersTests/TagScannerTest.java, + tests/scannersTests/TextareaTagScannerTest.java, + tests/scannersTests/TitleScannerTest.java, + tests/scannersTests/XmlEndTagScanningTest.java, + tests/tagTests/AllTests.java, tests/tagTests/BaseHrefTagTest.java, + tests/tagTests/BodyTagTest.java, + tests/tagTests/CompositeTagTest.java, + tests/tagTests/DoctypeTagTest.java, tests/tagTests/EndTagTest.java, + tests/tagTests/FormTagTest.java, + tests/tagTests/FrameSetTagTest.java, + tests/tagTests/FrameTagTest.java, tests/tagTests/InputTagTest.java, + tests/tagTests/MetaTagTest.java, + tests/tagTests/ObjectCollectionTest.java, + tests/tagTests/OptionTagTest.java, + tests/tagTests/ScriptTagTest.java, + tests/tagTests/SelectTagTest.java, + tests/tagTests/StyleTagTest.java, tests/tagTests/TagTest.java, + tests/tagTests/TextareaTagTest.java, + tests/tagTests/TitleTagTest.java, + tests/temporaryFailures/AttributeParserTest.java, + tests/temporaryFailures/TagParserTest.java, + tests/utilTests/AllTests.java, tests/utilTests/BeanTest.java, + tests/utilTests/CharacterTranslationTest.java, + tests/utilTests/HTMLLinkProcessorTest.java, + tests/utilTests/HTMLParserUtilsTest.java, + tests/utilTests/HTMLTagParserTest.java, + tests/utilTests/NodeListTest.java, + tests/visitorsTests/AllTests.java, + tests/visitorsTests/CompositeTagFindingVisitorTest.java, + tests/visitorsTests/HtmlPageTest.java, + tests/visitorsTests/LinkFindingVisitorTest.java, + tests/visitorsTests/NodeVisitorTest.java, + tests/visitorsTests/StringFindingVisitorTest.java, + tests/visitorsTests/TagFindingVisitorTest.java, + tests/visitorsTests/TextExtractingVisitorTest.java, + tests/visitorsTests/UrlModifyingVisitorTest.java, + util/DefaultParserFeedback.java, util/LinkProcessor.java, + util/NodeIterator.java, util/NodeList.java, util/ParserUtils.java, + util/PeekingIterator.java, util/SimpleNodeIterator.java, + util/Translate.java, util/sort/Ordered.java, util/sort/Sort.java, + util/sort/Sortable.java, visitors/HtmlPage.java, + visitors/LinkFindingVisitor.java, visitors/NodeVisitor.java, + visitors/ObjectFindingVisitor.java, + visitors/StringFindingVisitor.java, + visitors/TagFindingVisitor.java, + visitors/TextExtractingVisitor.java, + visitors/UrlModifyingVisitor.java: + + Change tabs to spaces in all source files. + + 2003-09-01 20:41 derrickoswald + + * docs/samples/index.html: + + Further to bug #786869 LinkExtractor Sample not working, + The original samples directory on the web page either needs to be revamped or removed + and the missing pieces moved to the wiki pages: + http://htmlparser.sourceforge.net/docs/index.php/SamplePrograms + + Until then I've flagged the samples directory as out of date in the index.html file. + I'll add the task to the list of requests for feature enhancements. + + 2003-09-01 18:02 derrickoswald + + * src/org/htmlparser/tests/scannersTests/AppletScannerTest.java: + + Further to bug #798554 Applet Tag does not update codebase data, fix test case. + + ******************** + Note: + AppletTag getAttribute() no longer retrieves the parameter, it returns the Tag.getAttribute() value like it should. + Use AttributeTag.getParameter() to access the list of parameters of the applet tag., + ******************** + + 2003-09-01 17:53 derrickoswald + + * src/org/htmlparser/tests/tagTests/JspTagTest.java: + + Incorporated test cases from bug #772700 Jsp Tags are not parsed correctly when in quoted attributes. + This should be resolved when the lexer package is integrated. + Commented out the one that fails. + + 2003-09-01 17:41 derrickoswald + + * src/org/htmlparser/tests/tagTests/ImageTagTest.java: + + Add test case text from bug bug #778781 SRC-attribute suppression in IMG-tags. + + 2003-09-01 17:28 derrickoswald + + * src/org/htmlparser/: tags/LinkTag.java, + tests/tagTests/LinkTagTest.java: + + Fix bug #784767 irc://server/channel urls are HTTPLike? + Added an isIRCLink() method, but, + I'm not sure that isHTTPLikeLink() should use the gainsaying of all know link types. + This needs review. + + 2003-09-01 16:48 derrickoswald + + * docs/samples/links.html: + + Fix bug #786869 LinkExtractor Sample not working. + + 2003-09-01 16:24 derrickoswald + + * src/org/htmlparser/parserapplications/LinkExtractor.java: + + Fix bug #786869 LinkExtractor Sample not working. + + 2003-09-01 15:55 derrickoswald + + * src/org/htmlparser/: RemarkNodeParser.java, + tests/parserHelperTests/RemarkNodeParserTest.java: + + Workaround for bug #788746 parser crashes on comments like <!-- foobar --!>. + No real solution because the codebase assumes remarks end with -->, so this + just avoids the crash, but the toHtml() output will output --!-->, which isn't really correct. + Added the test case as RemarkNodeParserTest.testExclamationComment(). + + 2003-09-01 15:20 derrickoswald + + * src/org/htmlparser/Parser.java: + + Fixed bug #798553 setInputHtml does not set text. + Added ! (not) to the guard. + + 2003-09-01 15:11 derrickoswald + + * src/org/htmlparser/: tags/AppletTag.java, + tests/tagTests/AppletTagTest.java: + + Fixed bug #798554 Applet Tag does not update codebase data. + Rewrote the AppletTag class to honour setting codebase, archive, applet class and applet params. + Added 4 new test cases in AppletTagTest. + + 2003-09-01 09:53 derrickoswald + + * src/org/htmlparser/Parser.java: + + Fix bug #798552 Sample for node iterator incorrect + DocComment changes only. + Tabs converted to spaces. + + 2003-08-26 22:40 derrickoswald + + * src/org/htmlparser/tests/lexerTests/KitTest.java: + + Testing cvs keyword substitution. + Integration Build 1.4 - 20030824 -------------------------------- Index: release.txt =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/docs/release.txt,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** release.txt 24 Aug 2003 21:59:41 -0000 1.47 --- release.txt 8 Sep 2003 02:26:27 -0000 1.48 *************** *** 1,3 **** ! HTMLParser Version 1.4 (Integration Build Aug 24, 2003) ********************************************* --- 1,3 ---- ! HTMLParser Version 1.4 (Integration Build Sep 07, 2003) ********************************************* |
From: <der...@us...> - 2003-09-08 02:27:10
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv7654/src/org/htmlparser Modified Files: AbstractNode.java Node.java NodeReader.java Parser.java RemarkNode.java RemarkNodeParser.java StringNode.java StringNodeFactory.java package.html Log Message: Update version headers to 1.4-20030907 and update changelog. Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** AbstractNode.java 3 Sep 2003 23:36:18 -0000 1.11 --- AbstractNode.java 8 Sep 2003 02:26:28 -0000 1.12 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** Node.java 3 Sep 2003 23:36:18 -0000 1.37 --- Node.java 8 Sep 2003 02:26:28 -0000 1.38 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: NodeReader.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/NodeReader.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** NodeReader.java 3 Sep 2003 23:36:18 -0000 1.41 --- NodeReader.java 8 Sep 2003 02:26:28 -0000 1.42 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: Parser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Parser.java,v retrieving revision 1.58 retrieving revision 1.59 diff -C2 -d -r1.58 -r1.59 *** Parser.java 1 Sep 2003 19:20:35 -0000 1.58 --- Parser.java 8 Sep 2003 02:26:28 -0000 1.59 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // *************** *** 157,161 **** */ public final static String ! VERSION_DATE = "Aug 24, 2003" ; --- 157,161 ---- */ public final static String ! VERSION_DATE = "Sep 07, 2003" ; Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNode.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** RemarkNode.java 3 Sep 2003 23:36:18 -0000 1.28 --- RemarkNode.java 8 Sep 2003 02:26:28 -0000 1.29 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: RemarkNodeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNodeParser.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** RemarkNodeParser.java 1 Sep 2003 19:55:59 -0000 1.28 --- RemarkNodeParser.java 8 Sep 2003 02:26:28 -0000 1.29 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** StringNode.java 3 Sep 2003 23:36:18 -0000 1.36 --- StringNode.java 8 Sep 2003 02:26:28 -0000 1.37 *************** *** 1,3 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // --- 1,3 ---- ! // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // Index: StringNodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNodeFactory.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** StringNodeFactory.java 3 Sep 2003 23:36:18 -0000 1.2 --- StringNodeFactory.java 8 Sep 2003 02:26:28 -0000 1.3 *************** *** 1,2 **** --- 1,30 ---- + // HTMLParser Library v1_4_20030907 - A java-based parser for HTML + // Copyright (C) Dec 31, 2000 Somik Raha + // + // This library is free software; you can redistribute it and/or + // modify it under the terms of the GNU Lesser General Public + // License as published by the Free Software Foundation; either + // version 2.1 of the License, or (at your option) any later version. + // + // This library is distributed in the hope that it will be useful, + // but WITHOUT ANY WARRANTY; without even the implied warranty of + // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + // Lesser General Public License for more details. + // + // You should have received a copy of the GNU Lesser General Public + // License along with this library; if not, write to the Free Software + // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + // + // For any questions or suggestions, you can write to me at : + // Email :so...@in... + // + // Postal Address : + // Somik Raha + // Extreme Programmer & Coach + // Industrial Logic Corporation + // 2583 Cedar Street, Berkeley, + // CA 94708, USA + // Website : http://www.industriallogic.com + package org.htmlparser; Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/package.html,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** package.html 24 Aug 2003 21:59:41 -0000 1.13 --- package.html 8 Sep 2003 02:26:28 -0000 1.14 *************** *** 4,10 **** <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030824 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha --- 4,10 ---- <!-- ! @(#)package.html 1.60 98/01/27 ! HTMLParser Library v1_4_20030907 - A java-based parser for HTML Copyright (C) Dec 31, 2000 Somik Raha |
From: <der...@us...> - 2003-09-07 21:29:02
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv25784 Modified Files: Attribute.java TagNode.java Log Message: Fix setAttribute and optimize getAttribute for speed. Index: Attribute.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/Attribute.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** Attribute.java 24 Aug 2003 21:59:41 -0000 1.4 --- Attribute.java 7 Sep 2003 21:28:03 -0000 1.5 *************** *** 37,41 **** /** * An attribute within a tag. ! * <p>If Name is null, it's whitepace and Value has the text. * <p>If Name is not null, and Value is null it's a standalone attribute. * <p>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. --- 37,41 ---- /** * An attribute within a tag. ! * <p>If Name is null, it is whitepace and Value has the text. * <p>If Name is not null, and Value is null it's a standalone attribute. * <p>If Name is not null, and Value is "", and Quote is zero it's an empty attribute. *************** *** 156,159 **** --- 156,160 ---- * Get the value of the attribute. * The part after the equals sign, or the text if it's just a whitepace 'attribute'. + * <em>NOTE: This does not include any quotes that may have enclosed the value.</em> * @return The value, or <code>null</code> if it's a stand-alone attribute, * or the text if it's just a whitepace 'attribute'. *************** *** 168,171 **** --- 169,228 ---- /** + * Get the raw value of the attribute. + * The part after the equals sign, or the text if it's just a whitepace 'attribute'. + * @return The value, or <code>null</code> if it's a stand-alone attribute, + * or the text if it's just a whitepace 'attribute'. + */ + public String getRawValue () + { + char quote; + StringBuffer buffer; + String ret; + + ret = getValue (); + if (null != ret && (0 != (quote = getQuote ()))) + { + buffer = new StringBuffer (ret.length() + 2); + buffer.append (quote); + buffer.append (ret); + buffer.append (quote); + ret = buffer.toString (); + } + + return (ret); + } + + /** + * Get the raw value of the attribute. + * The part after the equals sign, or the text if it's just a whitepace 'attribute'. + * @return The value, or <code>null</code> if it's a stand-alone attribute, + * or the text if it's just a whitepace 'attribute'. + */ + public void getRawValue (StringBuffer buffer) + { + char quote; + + if (null == mValue) + { + if (0 <= mValueStart) + { + if (0 != (quote = getQuote ())) + buffer.append (quote); + mPage.getText (buffer, mValueStart, mValueEnd); + if (0 != quote) + buffer.append (quote); + } + } + else + { + if (0 != (quote = getQuote ())) + buffer.append (quote); + buffer.append (mValue); + if (0 != quote) + buffer.append (quote); + } + } + + /** * Get the quote, if any, surrounding the value of the attribute, if any. * @return Either ' or " if the attribute value was quoted, or zero *************** *** 194,218 **** public void toString (StringBuffer buffer) { - String value; String name; - value = getValue (); name = getName (); if (null == name) ! { ! if (value != null) ! buffer.append (value); ! } else { buffer.append (name); ! if (null != value) { buffer.append ("="); ! if (0 != getQuote ()) ! buffer.append (getQuote ()); ! buffer.append (value); ! if (0 != getQuote ()) ! buffer.append (getQuote ()); } } --- 251,266 ---- public void toString (StringBuffer buffer) { String name; name = getName (); if (null == name) ! getRawValue (buffer); else { buffer.append (name); ! if (0 <= mValueStart) { buffer.append ("="); ! getRawValue (buffer); } } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** TagNode.java 3 Sep 2003 23:36:19 -0000 1.6 --- TagNode.java 7 Sep 2003 21:28:03 -0000 1.7 *************** *** 38,41 **** --- 38,42 ---- import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; + import org.htmlparser.util.Translate; /** *************** *** 121,152 **** /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter */ public String getAttribute (String name) { ! return ((String)getAttributes().get(name.toUpperCase())); } /** * Set attribute with given key, value pair. ! * @param key ! * @param value */ ! public void setAttribute(String key, String value) { ! getAttributes ().put(key,value); } /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter * @deprecated use getAttribute instead */ ! public String getParameter(String name) { ! return (String)getAttributes().get (name.toUpperCase()); } --- 122,293 ---- /** ! * Create a tag with the location and attributes provided ! * @param page The page this tag was read from. ! * @param start The starting offset of this node within the page. ! * @param end The ending offset of this node within the page. ! * @param attributes The list of attributes that were parsed in this tag. ! * @see Attribute ! */ ! public TagNode () ! { ! super (null, -1, -1); ! mAttributes = new Vector (); ! } ! ! /** ! * Returns the value of an attribute. ! * @param name Name of attribute, case insensitive. ! * @return The value associated with the attribute or null if it does ! * not exist, or is a stand-alone or */ public String getAttribute (String name) { ! Vector attributes; ! int size; ! Attribute attribute; ! String string; ! String ret; ! ! ret = null; ! ! attributes = getAttributesEx (); ! if (name.equalsIgnoreCase (TAGNAME)) ! ret = ((Attribute)attributes.elementAt (0)).getName (); ! else ! { ! size = attributes.size (); ! for (int i = 1; i < size; i++) ! { ! attribute = (Attribute)attributes.elementAt (i); ! string = attribute.getName (); ! if ((null != string) && name.equalsIgnoreCase (string)) ! { ! ret = attribute.getValue (); ! i = size; // exit fast ! } ! } ! } ! ! return (ret); } /** * Set attribute with given key, value pair. ! * Figures out a quote character to use if necessary. ! * @param key The name of the attribute. ! * @param value The value of the attribute. */ ! public void setAttribute (String key, String value) { ! char ch; ! boolean needed; ! boolean singleq; ! boolean doubleq; ! String ref; ! StringBuffer buffer; ! char quote; ! ! // first determine if there's whitespace in the value ! // and while we'return at it find a suitable quote character ! needed = false; ! singleq = true; ! doubleq = true; ! for (int i = 0; i < value.length (); i++) ! { ! ch = value.charAt (i); ! if (Character.isWhitespace (ch)) ! needed = true; ! else if ('\'' == ch) ! singleq = false; ! else if ('"' == ch) ! doubleq = false; ! } ! ! // now apply quoting ! if (needed) ! { ! if (doubleq) ! quote = '"'; ! else if (singleq) ! quote = '\''; ! else ! { ! // uh-oh, we need to convert some quotes into character references ! // convert all double quotes into " ! quote = '"'; ! ref = Translate.convertToString (quote); ! // JDK 1.4: value = value.replaceAll ("\"", ref); ! buffer = new StringBuffer (value.length() * 5); ! for (int i = 0; i < value.length (); i++) ! { ! ch = value.charAt (i); ! if ('"' == ch) ! buffer.append (ref); ! else ! buffer.append (ch); ! } ! value = buffer.toString (); ! } ! } ! else ! quote = 0; ! setAttribute (key, value, quote); } /** ! * Set attribute with given key, value pair where the value is quoted by quote. ! * @param key The name of the attribute. ! * @param value The value of the attribute. ! * @param quote The quote character to be used around value. ! * If zero, it is an unquoted value. ! */ ! public void setAttribute (String key, String value, char quote) ! { ! setAttribute (new Attribute (key, value, quote)); ! } ! ! /** ! * Set an attribute. ! * This replaces an attribute of the same name. ! * To set the zeroth attribute (the tag name), use setTagName(). ! * @param attribute The attribute to set. ! */ ! public void setAttribute (Attribute attribute) ! { ! boolean replaced; ! Vector attributes; ! String name; ! Attribute test; ! String test_name; ! ! replaced = false; ! attributes = getAttributesEx (); ! if (0 < attributes.size ()) ! { ! name = attribute.getName (); ! for (int i = 1; i < attributes.size (); i++) ! { ! test = (Attribute)attributes.elementAt (i); ! test_name = test.getName (); ! if (null != test_name) ! if (test_name.equalsIgnoreCase (name)) ! { ! attributes.setElementAt (attribute, i); ! replaced = true; ! } ! } ! } ! if (!replaced) ! attributes.addElement (attribute); ! } ! ! /** ! * Eqivalent to <code>getAttribute (name)</code>. ! * @param name Name of attribute. * @deprecated use getAttribute instead */ ! public String getParameter (String name) { ! return (getAttribute (name)); } *************** *** 158,162 **** * @return Returns a special hashtable of attributes in two element String arrays. */ ! public Vector getAttributesEx() { return mAttributes; --- 299,303 ---- * @return Returns a special hashtable of attributes in two element String arrays. */ ! public Vector getAttributesEx () { return mAttributes; *************** *** 165,171 **** /** * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes */ ! public Hashtable getAttributes() { Vector attributes; --- 306,312 ---- /** * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes. */ ! public Hashtable getAttributes () { Vector attributes; *************** *** 188,213 **** if (null != attribute.getName ()) { ! value = attribute.getValue (); ! if ('\'' == attribute.getQuote ()) ! { ! _value = new StringBuffer (value.length () + 2); ! _value.append ("'"); ! _value.append (value); ! _value.append ("'"); ! value = _value.toString (); ! } ! else if ('"' == attribute.getQuote ()) { ! _value = new StringBuffer (value.length () + 2); ! _value.append ("\""); ! _value.append (value); ! _value.append ("\""); ! value = _value.toString (); } - else if ((null != value) && value.equals ("")) - value = NOTHING; if (null == value) value = NULLVALUE; ! ret.put (attribute.getName (), value); } } --- 329,343 ---- if (null != attribute.getName ()) { ! if (0 != attribute.getQuote ()) ! value = attribute.getRawValue (); ! else { ! value = attribute.getValue (); ! if ((null != value) && value.equals ("")) ! value = NOTHING; } if (null == value) value = NULLVALUE; ! ret.put (attribute.getName ().toUpperCase (), value); } } *************** *** 219,230 **** } ! public String getTagName(){ ! return getParameter(TAGNAME); } /** ! * Return the text contained in this tag */ ! public String getText() { return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); --- 349,402 ---- } ! /** ! * Return the name of this tag. ! * <p> ! * <em> ! * Note: This value is converted to uppercase. ! * To get at the original case version of the tag name use: ! * <pre> ! * getAttribute (TagNode.TAGNAME); ! * </pre> ! * </em> ! * @return The tag name. ! */ ! public String getTagName () ! { ! return (getAttribute (TAGNAME).toUpperCase ()); } /** ! * Set the name of this tag. ! * This creates or replaces the first attribute of the tag (the ! * zeroth element of the attribute vector). ! * @param name The tag name. */ ! public void setTagName (String name) ! { ! Attribute attribute; ! Vector attributes; ! Attribute zeroth; ! ! attribute = new Attribute (name, null, (char)0); ! attributes = getAttributesEx (); ! if (0 == attributes.size ()) ! // nothing added yet ! attributes.addElement (attribute); ! else ! { ! zeroth = (Attribute)attributes.elementAt (0); ! // check forn attribute that looks like a name ! if ((null == zeroth.getValue ()) && (0 == zeroth.getQuote ())) ! attributes.setElementAt (attribute, 0); ! else ! attributes.insertElementAt (attribute, 0); ! } ! } ! ! /** ! * Return the text contained in this tag. ! * @return The complete contents of the tag (within the angle brackets). ! */ ! public String getText () { return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); *************** *** 282,287 **** * @param tagBegin The nodeBegin to set */ ! public void setTagBegin(int tagBegin) { ! this.nodeBegin = tagBegin; } --- 454,460 ---- * @param tagBegin The nodeBegin to set */ ! public void setTagBegin (int tagBegin) ! { ! nodeBegin = tagBegin; } *************** *** 290,294 **** * @return The nodeBegin value. */ ! public int getTagBegin() { return (nodeBegin); } --- 463,468 ---- * @return The nodeBegin value. */ ! public int getTagBegin () ! { return (nodeBegin); } *************** *** 298,303 **** * @param tagEnd The nodeEnd to set */ ! public void setTagEnd(int tagEnd) { ! this.nodeEnd = tagEnd; } --- 472,478 ---- * @param tagEnd The nodeEnd to set */ ! public void setTagEnd (int tagEnd) ! { ! nodeEnd = tagEnd; } *************** *** 306,310 **** * @return The nodeEnd value. */ ! public int getTagEnd() { return (nodeEnd); } --- 481,486 ---- * @return The nodeEnd value. */ ! public int getTagEnd () ! { return (nodeEnd); } *************** *** 323,328 **** } ! public String toPlainTextString() { ! return EMPTY_STRING; } --- 499,505 ---- } ! public String toPlainTextString () ! { ! return (EMPTY_STRING); } *************** *** 331,335 **** * @see org.htmlparser.Node#toHtml() */ ! public String toHtml() { StringBuffer ret; --- 508,512 ---- * @see org.htmlparser.Node#toHtml() */ ! public String toHtml () { StringBuffer ret; *************** *** 362,366 **** * Print the contents of the tag */ ! public String toString() { String tag; --- 539,543 ---- * Print the contents of the tag */ ! public String toString () { String tag; *************** *** 395,399 **** * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) { } --- 572,576 ---- * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto (NodeList collectionList, String filter) { } *************** *** 404,408 **** * @deprecated This method is deprecated. Use getAttributes() instead. */ ! public Hashtable getParsed() { return getAttributes (); } --- 581,586 ---- * @deprecated This method is deprecated. Use getAttributes() instead. */ ! public Hashtable getParsed () ! { return getAttributes (); } *************** *** 417,421 **** * @return Hashtable */ ! public Hashtable redoParseAttributes() { mAttributes = null; --- 595,599 ---- * @return Hashtable */ ! public Hashtable redoParseAttributes () { mAttributes = null; *************** *** 424,431 **** } ! public void accept(Object visitor) { } ! public String getType() { return TYPE; } --- 602,611 ---- } ! public void accept (Object visitor) ! { } ! public String getType () ! { return TYPE; } *************** *** 436,444 **** * @return boolean */ ! public boolean isEmptyXmlTag() { return emptyXmlTag; } ! public void setEmptyXmlTag(boolean emptyXmlTag) { this.emptyXmlTag = emptyXmlTag; } --- 616,626 ---- * @return boolean */ ! public boolean isEmptyXmlTag () ! { return emptyXmlTag; } ! public void setEmptyXmlTag (boolean emptyXmlTag) ! { this.emptyXmlTag = emptyXmlTag; } |
From: <der...@us...> - 2003-09-03 23:38:03
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1:/tmp/cvs-serv31228/util Modified Files: DefaultParserFeedback.java LinkProcessor.java NodeIterator.java NodeList.java ParserUtils.java PeekingIterator.java SimpleNodeIterator.java Translate.java Log Message: Change tabs to spaces in all source files. Index: DefaultParserFeedback.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/DefaultParserFeedback.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** DefaultParserFeedback.java 24 Aug 2003 21:59:44 -0000 1.23 --- DefaultParserFeedback.java 3 Sep 2003 23:36:22 -0000 1.24 *************** *** 69,73 **** * </pre> */ ! protected int mode; /** --- 69,73 ---- * </pre> */ ! protected int mMode; /** *************** *** 79,88 **** * QUIET - no messages * </pre> */ public DefaultParserFeedback (int mode) { ! if (mode<QUIET||mode>DEBUG) ! throw new IllegalArgumentException ("illegal mode (" + mode + "), must be one of: QUIET, NORMAL, DEBUG"); ! this.mode = mode; } --- 79,93 ---- * QUIET - no messages * </pre> + * @exception IllegalArgumentException if mode is not + * QUIET, NORMAL or DEBUG. */ public DefaultParserFeedback (int mode) { ! if (mode<QUIET||mode>DEBUG) ! throw new IllegalArgumentException ( ! "illegal mode (" ! + mode ! + "), must be one of: QUIET, NORMAL, DEBUG"); ! mMode = mode; } *************** *** 101,105 **** public void info (String message) { ! if (mode!=QUIET) System.out.println ("INFO: " + message); } --- 106,110 ---- public void info (String message) { ! if (QUIET != mMode) System.out.println ("INFO: " + message); } *************** *** 111,115 **** public void warning (String message) { ! if (mode!=QUIET) System.out.println ("WARNING: " + message); } --- 116,120 ---- public void warning (String message) { ! if (QUIET != mMode) System.out.println ("WARNING: " + message); } *************** *** 122,129 **** public void error (String message, ParserException exception) { ! if (mode!=QUIET) { System.out.println ("ERROR: " + message); ! if (mode == DEBUG && (exception!=null)) exception.printStackTrace (); } --- 127,134 ---- public void error (String message, ParserException exception) { ! if (QUIET != mMode) { System.out.println ("ERROR: " + message); ! if (DEBUG == mMode && (null != exception)) exception.printStackTrace (); } Index: LinkProcessor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/LinkProcessor.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** LinkProcessor.java 24 Aug 2003 21:59:45 -0000 1.23 --- LinkProcessor.java 3 Sep 2003 23:36:22 -0000 1.24 *************** *** 76,80 **** else { ! URL url = constructUrl(link, base); ret = url.toExternalForm (); } --- 76,80 ---- else { ! URL url = constructUrl(link, base); ret = url.toExternalForm (); } *************** *** 88,124 **** } ! public URL constructUrl(String link, String base) ! throws MalformedURLException { ! String path; ! boolean modified; ! boolean absolute; ! int index; ! URL url; // constructed URL combining relative link and base ! url = new URL (new URL (base), link); ! path = url.getFile (); ! modified = false; ! absolute = link.startsWith ("/"); ! if (!absolute) { // we prefer to fix incorrect relative links ! // this doesn't fix them all, just the ones at the start ! while (path.startsWith ("/.")) { ! if (path.startsWith ("/../")) { ! path = path.substring (3); ! modified = true; ! } ! else if (path.startsWith ("/./") || path.startsWith("/.")) { ! path = path.substring (2); ! modified = true; ! } else break; ! } ! } ! // fix backslashes ! while (-1 != (index = path.indexOf ("/\\"))) { ! path = path.substring (0, index + 1) + path.substring (index + 2); ! modified = true; ! } ! if (modified) ! url = new URL (url, path); ! return url; ! } /** --- 88,124 ---- } ! public URL constructUrl(String link, String base) ! throws MalformedURLException { ! String path; ! boolean modified; ! boolean absolute; ! int index; ! URL url; // constructed URL combining relative link and base ! url = new URL (new URL (base), link); ! path = url.getFile (); ! modified = false; ! absolute = link.startsWith ("/"); ! if (!absolute) { // we prefer to fix incorrect relative links ! // this doesn't fix them all, just the ones at the start ! while (path.startsWith ("/.")) { ! if (path.startsWith ("/../")) { ! path = path.substring (3); ! modified = true; ! } ! else if (path.startsWith ("/./") || path.startsWith("/.")) { ! path = path.substring (2); ! modified = true; ! } else break; ! } ! } ! // fix backslashes ! while (-1 != (index = path.indexOf ("/\\"))) { ! path = path.substring (0, index + 1) + path.substring (index + 2); ! modified = true; ! } ! if (modified) ! url = new URL (url, path); ! return url; ! } /** *************** *** 194,207 **** } ! public static String removeLastSlash(String baseUrl) { ! if(baseUrl.charAt(baseUrl.length()-1)=='/') ! { ! return baseUrl.substring(0,baseUrl.length()-1); ! } ! else ! { ! return baseUrl; ! } ! } } --- 194,207 ---- } ! public static String removeLastSlash(String baseUrl) { ! if(baseUrl.charAt(baseUrl.length()-1)=='/') ! { ! return baseUrl.substring(0,baseUrl.length()-1); ! } ! else ! { ! return baseUrl; ! } ! } } Index: NodeIterator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeIterator.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** NodeIterator.java 24 Aug 2003 21:59:45 -0000 1.24 --- NodeIterator.java 3 Sep 2003 23:36:22 -0000 1.25 *************** *** 36,40 **** * @return <code>true</code> if a call to <code>nextHTMLNode()</code> will succeed. */ ! public boolean hasMoreNodes() throws ParserException; /** --- 36,40 ---- * @return <code>true</code> if a call to <code>nextHTMLNode()</code> will succeed. */ ! public boolean hasMoreNodes() throws ParserException; /** *************** *** 42,46 **** * @return The next node in the HTML stream, or null if there are no more nodes. */ ! public Node nextNode() throws ParserException; ! } --- 42,46 ---- * @return The next node in the HTML stream, or null if there are no more nodes. */ ! public Node nextNode() throws ParserException; ! } Index: NodeList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** NodeList.java 24 Aug 2003 21:59:45 -0000 1.40 --- NodeList.java 3 Sep 2003 23:36:22 -0000 1.41 *************** *** 35,59 **** public class NodeList implements Serializable { ! private static final int INITIAL_CAPACITY=10; ! //private static final int CAPACITY_INCREMENT=20; ! private Node nodeData[]; ! private int size; ! private int capacity; ! private int capacityIncrement; ! private int numberOfAdjustments; ! ! public NodeList() { ! size = 0; ! capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor(capacity); ! capacityIncrement = capacity*2; ! numberOfAdjustments = 0; ! } ! ! public void add(Node node) { ! if (size==capacity) ! adjustVectorCapacity(); ! nodeData[size++]=node; ! } /** --- 35,59 ---- public class NodeList implements Serializable { ! private static final int INITIAL_CAPACITY=10; ! //private static final int CAPACITY_INCREMENT=20; ! private Node nodeData[]; ! private int size; ! private int capacity; ! private int capacityIncrement; ! private int numberOfAdjustments; ! ! public NodeList() { ! size = 0; ! capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor(capacity); ! capacityIncrement = capacity*2; ! numberOfAdjustments = 0; ! } ! ! public void add(Node node) { ! if (size==capacity) ! adjustVectorCapacity(); ! nodeData[size++]=node; ! } /** *************** *** 61,69 **** * @param list The list to add. */ ! public void add (NodeList list) { for (int i = 0; i < list.size; i++) add (list.nodeData[i]); ! } /** --- 61,69 ---- * @param list The list to add. */ ! public void add (NodeList list) { for (int i = 0; i < list.size; i++) add (list.nodeData[i]); ! } /** *************** *** 71,166 **** * @param node The new first element. */ ! public void prepend(Node node) { ! if (size==capacity) ! adjustVectorCapacity(); System.arraycopy (nodeData, 0, nodeData, 1, size); size++; ! nodeData[0]=node; ! } ! private void adjustVectorCapacity() { ! capacity += capacityIncrement; ! capacityIncrement *= 2; ! Node oldData [] = nodeData; ! nodeData = newNodeArrayFor(capacity); ! System.arraycopy(oldData, 0, nodeData, 0, size); ! numberOfAdjustments++; ! } ! private Node[] newNodeArrayFor(int capacity) { ! return new Node[capacity]; ! } ! ! public int size() { ! return size; ! } ! ! public Node elementAt(int i) { ! return nodeData[i]; ! } ! public int getNumberOfAdjustments() { ! return numberOfAdjustments; ! } ! ! public SimpleNodeIterator elements() { ! return new SimpleNodeIterator() { ! int count = 0; ! ! public boolean hasMoreNodes() { ! return count < size; ! } ! ! public Node nextNode() { ! synchronized (NodeList.this) { ! if (count < size) { ! return nodeData[count++]; ! } ! } ! throw new NoSuchElementException("Vector Enumeration"); ! } ! }; ! } ! ! public Node [] toNodeArray() { ! Node [] nodeArray = newNodeArrayFor(size); ! System.arraycopy(nodeData, 0, nodeArray, 0, size); ! return nodeArray; ! } ! ! public String asString() { ! StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toPlainTextString()); ! return buff.toString(); ! } ! ! public String asHtml() { ! StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toHtml()); ! return buff.toString(); ! } ! ! public void remove(int index) { ! System.arraycopy(nodeData, index+1, nodeData, index, size-index-1); ! size--; ! } ! ! public void removeAll() { ! size = 0; ! capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor(capacity); ! capacityIncrement = capacity*2; ! numberOfAdjustments = 0; ! } ! ! public String toString() { ! StringBuffer text = new StringBuffer(); ! for (int i=0;i<size;i++) ! text.append(nodeData[i].toPlainTextString()); ! return text.toString(); ! } /** --- 71,166 ---- * @param node The new first element. */ ! public void prepend(Node node) { ! if (size==capacity) ! adjustVectorCapacity(); System.arraycopy (nodeData, 0, nodeData, 1, size); size++; ! nodeData[0]=node; ! } ! private void adjustVectorCapacity() { ! capacity += capacityIncrement; ! capacityIncrement *= 2; ! Node oldData [] = nodeData; ! nodeData = newNodeArrayFor(capacity); ! System.arraycopy(oldData, 0, nodeData, 0, size); ! numberOfAdjustments++; ! } ! private Node[] newNodeArrayFor(int capacity) { ! return new Node[capacity]; ! } ! ! public int size() { ! return size; ! } ! ! public Node elementAt(int i) { ! return nodeData[i]; ! } ! public int getNumberOfAdjustments() { ! return numberOfAdjustments; ! } ! ! public SimpleNodeIterator elements() { ! return new SimpleNodeIterator() { ! int count = 0; ! ! public boolean hasMoreNodes() { ! return count < size; ! } ! ! public Node nextNode() { ! synchronized (NodeList.this) { ! if (count < size) { ! return nodeData[count++]; ! } ! } ! throw new NoSuchElementException("Vector Enumeration"); ! } ! }; ! } ! ! public Node [] toNodeArray() { ! Node [] nodeArray = newNodeArrayFor(size); ! System.arraycopy(nodeData, 0, nodeArray, 0, size); ! return nodeArray; ! } ! ! public String asString() { ! StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toPlainTextString()); ! return buff.toString(); ! } ! ! public String asHtml() { ! StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toHtml()); ! return buff.toString(); ! } ! ! public void remove(int index) { ! System.arraycopy(nodeData, index+1, nodeData, index, size-index-1); ! size--; ! } ! ! public void removeAll() { ! size = 0; ! capacity = INITIAL_CAPACITY; ! nodeData = newNodeArrayFor(capacity); ! capacityIncrement = capacity*2; ! numberOfAdjustments = 0; ! } ! ! public String toString() { ! StringBuffer text = new StringBuffer(); ! for (int i=0;i<size;i++) ! text.append(nodeData[i].toPlainTextString()); ! return text.toString(); ! } /** *************** *** 168,175 **** * @param classType The class to search for. */ ! public NodeList searchFor (Class classType) { return (searchFor (classType, false)); ! } /** --- 168,175 ---- * @param classType The class to search for. */ ! public NodeList searchFor (Class classType) { return (searchFor (classType, false)); ! } /** Index: ParserUtils.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** ParserUtils.java 24 Aug 2003 21:59:45 -0000 1.26 --- ParserUtils.java 3 Sep 2003 23:36:22 -0000 1.27 *************** *** 38,120 **** public class ParserUtils { ! ! public static String toString(Tag tag) { ! String tagName = tag.getAttribute(Tag.TAGNAME); ! Hashtable attrs = tag.getAttributes(); ! StringBuffer lString = new StringBuffer(tagName); ! lString.append(" TAG\n"); ! lString.append("--------\n"); ! for (Enumeration e = attrs.keys(); e.hasMoreElements();) { ! String key = (String) e.nextElement(); ! String value = (String) attrs.get(key); ! if (!key.equalsIgnoreCase(Tag.TAGNAME) && value.length() > 0) ! lString.append(key).append(" : ").append(value).append("\n"); ! } ! return lString.toString(); ! } ! public static Map adjustScanners(NodeReader reader) { ! Map tempScanners = new Hashtable(); ! tempScanners = reader.getParser().getScanners(); ! // Remove all existing scanners ! reader.getParser().flushScanners(); ! return tempScanners; ! } ! ! public static void restoreScanners(NodeReader reader, Map tempScanners) { ! // Flush the scanners ! reader.getParser().setScanners(tempScanners); ! } ! public static String removeChars(String s, char occur) { ! StringBuffer newString = new StringBuffer(); ! char ch; ! for (int i = 0; i < s.length(); i++) { ! ch = s.charAt(i); ! if (ch != occur) ! newString.append(ch); ! } ! return newString.toString(); ! } ! public static String removeEscapeCharacters(String inputString) { ! inputString = ParserUtils.removeChars(inputString, '\r'); ! inputString = ParserUtils.removeChars(inputString, '\n'); ! inputString = ParserUtils.removeChars(inputString, '\t'); ! return inputString; ! } ! public static String removeLeadingBlanks(String plainText) { ! while (plainText.indexOf(' ') == 0) ! plainText = plainText.substring(1); ! return plainText; ! } ! public static String removeTrailingBlanks(String text) { ! char ch = ' '; ! while (ch == ' ') { ! ch = text.charAt(text.length() - 1); ! if (ch == ' ') ! text = text.substring(0, text.length() - 1); ! } ! return text; ! } ! /** ! * Search given node and pick up any objects of given type, return ! * Node array. ! * @param node ! * @param type ! * @return Node[] ! */ ! public static Node[] findTypeInNode(Node node, Class type) { ! NodeList nodeList = new NodeList(); ! node.collectInto(nodeList, type); ! Node spans[] = nodeList.toNodeArray(); ! return spans; ! } } --- 38,120 ---- public class ParserUtils { ! ! public static String toString(Tag tag) { ! String tagName = tag.getAttribute(Tag.TAGNAME); ! Hashtable attrs = tag.getAttributes(); ! StringBuffer lString = new StringBuffer(tagName); ! lString.append(" TAG\n"); ! lString.append("--------\n"); ! for (Enumeration e = attrs.keys(); e.hasMoreElements();) { ! String key = (String) e.nextElement(); ! String value = (String) attrs.get(key); ! if (!key.equalsIgnoreCase(Tag.TAGNAME) && value.length() > 0) ! lString.append(key).append(" : ").append(value).append("\n"); ! } ! return lString.toString(); ! } ! public static Map adjustScanners(NodeReader reader) { ! Map tempScanners = new Hashtable(); ! tempScanners = reader.getParser().getScanners(); ! // Remove all existing scanners ! reader.getParser().flushScanners(); ! return tempScanners; ! } ! ! public static void restoreScanners(NodeReader reader, Map tempScanners) { ! // Flush the scanners ! reader.getParser().setScanners(tempScanners); ! } ! public static String removeChars(String s, char occur) { ! StringBuffer newString = new StringBuffer(); ! char ch; ! for (int i = 0; i < s.length(); i++) { ! ch = s.charAt(i); ! if (ch != occur) ! newString.append(ch); ! } ! return newString.toString(); ! } ! public static String removeEscapeCharacters(String inputString) { ! inputString = ParserUtils.removeChars(inputString, '\r'); ! inputString = ParserUtils.removeChars(inputString, '\n'); ! inputString = ParserUtils.removeChars(inputString, '\t'); ! return inputString; ! } ! public static String removeLeadingBlanks(String plainText) { ! while (plainText.indexOf(' ') == 0) ! plainText = plainText.substring(1); ! return plainText; ! } ! public static String removeTrailingBlanks(String text) { ! char ch = ' '; ! while (ch == ' ') { ! ch = text.charAt(text.length() - 1); ! if (ch == ' ') ! text = text.substring(0, text.length() - 1); ! } ! return text; ! } ! /** ! * Search given node and pick up any objects of given type, return ! * Node array. ! * @param node ! * @param type ! * @return Node[] ! */ ! public static Node[] findTypeInNode(Node node, Class type) { ! NodeList nodeList = new NodeList(); ! node.collectInto(nodeList, type); ! Node spans[] = nodeList.toNodeArray(); ! return spans; ! } } Index: PeekingIterator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/PeekingIterator.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** PeekingIterator.java 24 Aug 2003 21:59:45 -0000 1.12 --- PeekingIterator.java 3 Sep 2003 23:36:22 -0000 1.13 *************** *** 32,57 **** public interface PeekingIterator extends NodeIterator{ ! /** ! * Fetch a node without consuming it. ! * Subsequent calls to <code>peek()</code> will return subsequent nodes. ! * The node returned by <code>peek()</code> will never be a node already ! * consumed by <code>nextHTMLNode()</code>.<p> ! * For example, say there are nodes <H1><H2><H3><H4><H5>, ! * this is the nodes that would be returned for the indicated calls: ! * <pre> ! * peek() H1 ! * peek() H2 ! * nextHTMLNode() H1 ! * peek() H3 ! * nextHTMLNode() H2 ! * nextHTMLNode() H3 ! * nextHTMLNode() H4 ! * peek() H5 ! * </pre> ! * @return The next node that would be returned by <code>nextHTMLNode()</code> ! * or the node after the last node returned by <code>peek()</code>, whichever ! * is later in the stream. or null if there are no more nodes available via ! * the above rules. ! */ ! public Node peek () throws ParserException; } --- 32,57 ---- public interface PeekingIterator extends NodeIterator{ ! /** ! * Fetch a node without consuming it. ! * Subsequent calls to <code>peek()</code> will return subsequent nodes. ! * The node returned by <code>peek()</code> will never be a node already ! * consumed by <code>nextHTMLNode()</code>.<p> ! * For example, say there are nodes <H1><H2><H3><H4><H5>, ! * this is the nodes that would be returned for the indicated calls: ! * <pre> ! * peek() H1 ! * peek() H2 ! * nextHTMLNode() H1 ! * peek() H3 ! * nextHTMLNode() H2 ! * nextHTMLNode() H3 ! * nextHTMLNode() H4 ! * peek() H5 ! * </pre> ! * @return The next node that would be returned by <code>nextHTMLNode()</code> ! * or the node after the last node returned by <code>peek()</code>, whichever ! * is later in the stream. or null if there are no more nodes available via ! * the above rules. ! */ ! public Node peek () throws ParserException; } Index: SimpleNodeIterator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/SimpleNodeIterator.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** SimpleNodeIterator.java 24 Aug 2003 21:59:45 -0000 1.26 --- SimpleNodeIterator.java 3 Sep 2003 23:36:22 -0000 1.27 *************** *** 40,55 **** public interface SimpleNodeIterator extends NodeIterator { ! /** ! * Check if more nodes are available. ! * @return <code>true</code> if a call to <code>nextHTMLNode()</code> will ! * succeed. ! */ ! public boolean hasMoreNodes(); ! /** ! * Get the next node. ! * @return The next node in the HTML stream, or null if there are no more ! * nodes. ! */ ! public Node nextNode(); } --- 40,55 ---- public interface SimpleNodeIterator extends NodeIterator { ! /** ! * Check if more nodes are available. ! * @return <code>true</code> if a call to <code>nextHTMLNode()</code> will ! * succeed. ! */ ! public boolean hasMoreNodes(); ! /** ! * Get the next node. ! * @return The next node in the HTML stream, or null if there are no more ! * nodes. ! */ ! public Node nextNode(); } Index: Translate.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Translate.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** Translate.java 24 Aug 2003 21:59:45 -0000 1.32 --- Translate.java 3 Sep 2003 23:36:22 -0000 1.33 *************** *** 472,476 **** public static String decode (StringBuffer stringBuffer) { ! return decode(stringBuffer.toString()); } --- 472,476 ---- public static String decode (StringBuffer stringBuffer) { ! return decode(stringBuffer.toString()); } |
From: <der...@us...> - 2003-09-03 23:38:03
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort In directory sc8-pr-cvs1:/tmp/cvs-serv31228/util/sort Modified Files: Ordered.java Sort.java Sortable.java Log Message: Change tabs to spaces in all source files. Index: Ordered.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/Ordered.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Ordered.java 24 Aug 2003 21:59:45 -0000 1.3 --- Ordered.java 3 Sep 2003 23:36:22 -0000 1.4 *************** *** 1,88 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by ! // Derrick Oswald ! // ! ! package org.htmlparser.util.sort; ! ! /** ! * Describes an object that knows about ordering. ! * Implementors must have a comparison function, ! * which imposes a partial ordering on some ! * collection of objects. Ordered objects can be passed to a ! * sort method (such as org.htmlparser.util.sort.Sort) to allow precise control ! * over the sort order. ! * <p> ! * An set of elements S is partially ordered ! * if and only if <code>e1.compare(e2)==0</code> implies that ! * <code>e1.equals(e2)</code> for every e1 and e2 in S. ! * <p> ! * This all goes away in JDK 1.2. ! * <p> ! * For use with java.lang.Comparable from JDK 1.2: ! * <pre> ! * public int compare (Object o1, Object o2) ! * { ! * return (((Ordered)o1).compare (o2)); ! * } ! * </pre> ! * @see Sort ! */ ! public interface Ordered ! { ! /** ! * Compares this object with another for order. ! * Returns a negative integer, zero, or a positive integer ! * as this object is less than, equal to, or greater ! * than the second. ! * <p> ! * The implementor must ensure that ! * <code>sgn(x.compare(y)) == -sgn(y.compare(x))</code> ! * for all x and y. (This implies that <code>x.compare(y)</code> ! * must throw an exception if and only if <code>y.compare(x)</code> ! * throws an exception.) ! * <p> ! * The implementor must also ensure that the relation is transitive: ! * <code>((x.compare(y)>0) && (y.compare(z)>0))</code> ! * implies <code>x.compare(z)>0</code>. ! * <p> ! * Finally, the implementer must ensure that ! * <code>x.compare(y)==0</code> implies that ! * <code>sgn(x.compare(z))==sgn(y.compare(z))</code> ! * for all z. ! * @param that The object to compare this object against. ! * @return A negative integer, zero, or a positive ! * integer as this object is less than, equal to, ! * or greater than the second. ! * @exception ClassCastException The arguments type prevents it ! * from being compared by this Ordered. ! */ ! public int compare (Object that); ! } --- 1,88 ---- ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by ! // Derrick Oswald ! // ! ! package org.htmlparser.util.sort; ! ! /** ! * Describes an object that knows about ordering. ! * Implementors must have a comparison function, ! * which imposes a partial ordering on some ! * collection of objects. Ordered objects can be passed to a ! * sort method (such as org.htmlparser.util.sort.Sort) to allow precise control ! * over the sort order. ! * <p> ! * An set of elements S is partially ordered ! * if and only if <code>e1.compare(e2)==0</code> implies that ! * <code>e1.equals(e2)</code> for every e1 and e2 in S. ! * <p> ! * This all goes away in JDK 1.2. ! * <p> ! * For use with java.lang.Comparable from JDK 1.2: ! * <pre> ! * public int compare (Object o1, Object o2) ! * { ! * return (((Ordered)o1).compare (o2)); ! * } ! * </pre> ! * @see Sort ! */ ! public interface Ordered ! { ! /** ! * Compares this object with another for order. ! * Returns a negative integer, zero, or a positive integer ! * as this object is less than, equal to, or greater ! * than the second. ! * <p> ! * The implementor must ensure that ! * <code>sgn(x.compare(y)) == -sgn(y.compare(x))</code> ! * for all x and y. (This implies that <code>x.compare(y)</code> ! * must throw an exception if and only if <code>y.compare(x)</code> ! * throws an exception.) ! * <p> ! * The implementor must also ensure that the relation is transitive: ! * <code>((x.compare(y)>0) && (y.compare(z)>0))</code> ! * implies <code>x.compare(z)>0</code>. ! * <p> ! * Finally, the implementer must ensure that ! * <code>x.compare(y)==0</code> implies that ! * <code>sgn(x.compare(z))==sgn(y.compare(z))</code> ! * for all z. ! * @param that The object to compare this object against. ! * @return A negative integer, zero, or a positive ! * integer as this object is less than, equal to, ! * or greater than the second. ! * @exception ClassCastException The arguments type prevents it ! * from being compared by this Ordered. ! */ ! public int compare (Object that); ! } Index: Sort.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/Sort.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Sort.java 24 Aug 2003 21:59:45 -0000 1.3 --- Sort.java 3 Sep 2003 23:36:22 -0000 1.4 *************** *** 1,501 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of [...973 lines suppressed...] ! num = half; ! } ! } ! if (-1 == ret) ! ret = lo; ! ! return (ret); ! } ! ! /** ! * Binary search for an object ! * @param ref The name to search for. ! * @return The index at which reference was found or is to be inserted. ! */ ! public static int bsearch (Vector vector, Ordered ref) ! { ! return (bsearch (vector, ref, 0, vector.size () - 1)); ! } ! } ! Index: Sortable.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/Sortable.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** Sortable.java 24 Aug 2003 21:59:45 -0000 1.3 --- Sortable.java 3 Sep 2003 23:36:22 -0000 1.4 *************** *** 1,76 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by ! // Derrick Oswald ! // ! ! package org.htmlparser.util.sort; ! ! /** ! * Provides a mechanism to abstract the sort process. ! * Classes implementing this interface are collections of Ordered objects ! * that are to be sorted by the Sort class and are ! * not necessarily Vectors or Arrays of Ordered objects. ! * @see Sort ! */ ! public interface Sortable ! { ! /** ! * Returns the first index of the Sortable. ! * @return The index of the first element. ! */ ! public int first (); ! ! /** ! * Returns the last index of the Sortable. ! * @return The index of the last element. ! * If this were an array object this would be (object.length - 1). ! */ ! public int last (); ! ! /** ! * Fetch the object at the given index. ! * @param index The item number to get. ! * @param reuse If this argument is not null, it is an object ! * acquired from a previous fetch that is no longer needed and ! * may be returned as the result if it makes mores sense to alter ! * and return it than to fetch or create a new element. That is, the ! * reuse object is garbage and may be used to avoid allocating a new ! * object if that would normally be the strategy. ! * @return The Ordered object at that index. ! */ ! public Ordered fetch (int index, Ordered reuse); ! ! /** ! * Swaps the elements at the given indicies. ! * @param i One index. ! * @param j The other index. ! */ ! public void swap (int i, int j); ! } --- 1,76 ---- ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by ! // Derrick Oswald ! // ! ! package org.htmlparser.util.sort; ! ! /** ! * Provides a mechanism to abstract the sort process. ! * Classes implementing this interface are collections of Ordered objects ! * that are to be sorted by the Sort class and are ! * not necessarily Vectors or Arrays of Ordered objects. ! * @see Sort ! */ ! public interface Sortable ! { ! /** ! * Returns the first index of the Sortable. ! * @return The index of the first element. ! */ ! public int first (); ! ! /** ! * Returns the last index of the Sortable. ! * @return The index of the last element. ! * If this were an array object this would be (object.length - 1). ! */ ! public int last (); ! ! /** ! * Fetch the object at the given index. ! * @param index The item number to get. ! * @param reuse If this argument is not null, it is an object ! * acquired from a previous fetch that is no longer needed and ! * may be returned as the result if it makes mores sense to alter ! * and return it than to fetch or create a new element. That is, the ! * reuse object is garbage and may be used to avoid allocating a new ! * object if that would normally be the strategy. ! * @return The Ordered object at that index. ! */ ! public Ordered fetch (int index, Ordered reuse); ! ! /** ! * Swaps the elements at the given indicies. ! * @param i One index. ! * @param j The other index. ! */ ! public void swap (int i, int j); ! } |
From: <der...@us...> - 2003-09-03 23:38:03
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1:/tmp/cvs-serv31228/visitors Modified Files: HtmlPage.java LinkFindingVisitor.java NodeVisitor.java ObjectFindingVisitor.java StringFindingVisitor.java TagFindingVisitor.java TextExtractingVisitor.java UrlModifyingVisitor.java Log Message: Change tabs to spaces in all source files. Index: HtmlPage.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/HtmlPage.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** HtmlPage.java 24 Aug 2003 21:59:45 -0000 1.30 --- HtmlPage.java 3 Sep 2003 23:36:22 -0000 1.31 *************** *** 41,121 **** public class HtmlPage extends NodeVisitor { ! private String title; ! private NodeList nodesInBody; ! private NodeList tables; ! private boolean bodyTagBegin; ! ! public HtmlPage(Parser parser) { ! super(false); ! parser.registerScanners(); ! parser.addScanner(new TableScanner(parser)); ! nodesInBody = new NodeList(); ! tables = new NodeList(); ! bodyTagBegin = false; ! } ! ! public String getTitle() { ! return title; ! } ! public void setTitle(String title) { ! this.title = title; ! } ! public void visitTag(Tag tag) { ! addTagToBodyIfApplicable(tag); ! ! if (isTable(tag)) { ! tables.add(tag); ! } ! else { ! if (isBodyTag(tag)) ! bodyTagBegin = true; ! } ! } ! private boolean isTable(Tag tag) { ! return tag instanceof TableTag; ! } ! private void addTagToBodyIfApplicable(Node node) { ! if (bodyTagBegin) ! nodesInBody.add(node); ! } ! public void visitEndTag(EndTag endTag) { ! if (isBodyTag(endTag)) ! bodyTagBegin = false; ! addTagToBodyIfApplicable(endTag); ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! addTagToBodyIfApplicable(remarkNode); ! } ! public void visitStringNode(StringNode stringNode) { ! addTagToBodyIfApplicable(stringNode); ! } ! ! private boolean isBodyTag(Tag tag) { ! return tag.getTagName().equals("BODY"); ! } ! ! public NodeList getBody() { ! return nodesInBody; ! } ! ! public TableTag [] getTables() { ! TableTag [] tableArr = new TableTag[tables.size()]; ! for (int i=0;i<tables.size();i++) ! tableArr[i] = (TableTag)tables.elementAt(i); ! return tableArr; ! } ! public void visitTitleTag(TitleTag titleTag) { ! title = titleTag.getTitle(); ! } } --- 41,121 ---- public class HtmlPage extends NodeVisitor { ! private String title; ! private NodeList nodesInBody; ! private NodeList tables; ! private boolean bodyTagBegin; ! ! public HtmlPage(Parser parser) { ! super(false); ! parser.registerScanners(); ! parser.addScanner(new TableScanner(parser)); ! nodesInBody = new NodeList(); ! tables = new NodeList(); ! bodyTagBegin = false; ! } ! ! public String getTitle() { ! return title; ! } ! public void setTitle(String title) { ! this.title = title; ! } ! public void visitTag(Tag tag) { ! addTagToBodyIfApplicable(tag); ! ! if (isTable(tag)) { ! tables.add(tag); ! } ! else { ! if (isBodyTag(tag)) ! bodyTagBegin = true; ! } ! } ! private boolean isTable(Tag tag) { ! return tag instanceof TableTag; ! } ! private void addTagToBodyIfApplicable(Node node) { ! if (bodyTagBegin) ! nodesInBody.add(node); ! } ! public void visitEndTag(EndTag endTag) { ! if (isBodyTag(endTag)) ! bodyTagBegin = false; ! addTagToBodyIfApplicable(endTag); ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! addTagToBodyIfApplicable(remarkNode); ! } ! public void visitStringNode(StringNode stringNode) { ! addTagToBodyIfApplicable(stringNode); ! } ! ! private boolean isBodyTag(Tag tag) { ! return tag.getTagName().equals("BODY"); ! } ! ! public NodeList getBody() { ! return nodesInBody; ! } ! ! public TableTag [] getTables() { ! TableTag [] tableArr = new TableTag[tables.size()]; ! for (int i=0;i<tables.size();i++) ! tableArr[i] = (TableTag)tables.elementAt(i); ! return tableArr; ! } ! public void visitTitleTag(TitleTag titleTag) { ! title = titleTag.getTitle(); ! } } Index: LinkFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/LinkFindingVisitor.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** LinkFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.25 --- LinkFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 32,58 **** public class LinkFindingVisitor extends NodeVisitor { ! private String linkTextToFind; ! private boolean linkTagFound = false; ! private int count = 0; ! ! public LinkFindingVisitor(String linkTextToFind) { ! this.linkTextToFind = linkTextToFind.toUpperCase(); ! } ! public void visitLinkTag(LinkTag linkTag) { ! System.out.println("Matching with "+linkTag.getLinkText()); ! if (linkTag.getLinkText().toUpperCase().indexOf(linkTextToFind)!=-1) { ! linkTagFound = true; ! count++; ! } ! } ! ! public boolean linkTextFound() { ! return linkTagFound; ! } ! ! public int getCount() { ! return count; ! } } --- 32,58 ---- public class LinkFindingVisitor extends NodeVisitor { ! private String linkTextToFind; ! private boolean linkTagFound = false; ! private int count = 0; ! ! public LinkFindingVisitor(String linkTextToFind) { ! this.linkTextToFind = linkTextToFind.toUpperCase(); ! } ! public void visitLinkTag(LinkTag linkTag) { ! System.out.println("Matching with "+linkTag.getLinkText()); ! if (linkTag.getLinkText().toUpperCase().indexOf(linkTextToFind)!=-1) { ! linkTagFound = true; ! count++; ! } ! } ! ! public boolean linkTextFound() { ! return linkTagFound; ! } ! ! public int getCount() { ! return count; ! } } Index: NodeVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** NodeVisitor.java 24 Aug 2003 21:59:45 -0000 1.25 --- NodeVisitor.java 3 Sep 2003 23:36:22 -0000 1.26 *************** *** 38,95 **** public abstract class NodeVisitor { ! private boolean recurseChildren; ! private boolean recurseSelf; ! ! public NodeVisitor() { ! this(true); ! } ! ! public NodeVisitor(boolean recurseChildren) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = true; ! } ! ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; ! } ! public void visitTag(Tag tag) { ! ! } ! public void visitStringNode(StringNode stringNode) { ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! } ! ! public void visitImageTag(ImageTag imageTag) { ! } ! ! public void visitEndTag(EndTag endTag) { ! ! } ! ! public void visitTitleTag(TitleTag titleTag) { ! ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! ! } ! ! public boolean shouldRecurseChildren() { ! return recurseChildren; ! } ! ! public boolean shouldRecurseSelf() { ! return recurseSelf; ! } ! /** ! * Override this method if you wish to do special ! * processing upon completion of parsing ! */ ! public void finishedParsing() { ! } } --- 38,95 ---- public abstract class NodeVisitor { ! private boolean recurseChildren; ! private boolean recurseSelf; ! ! public NodeVisitor() { ! this(true); ! } ! ! public NodeVisitor(boolean recurseChildren) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = true; ! } ! ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { ! this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; ! } ! public void visitTag(Tag tag) { ! ! } ! public void visitStringNode(StringNode stringNode) { ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! } ! ! public void visitImageTag(ImageTag imageTag) { ! } ! ! public void visitEndTag(EndTag endTag) { ! ! } ! ! public void visitTitleTag(TitleTag titleTag) { ! ! } ! public void visitRemarkNode(RemarkNode remarkNode) { ! ! } ! ! public boolean shouldRecurseChildren() { ! return recurseChildren; ! } ! ! public boolean shouldRecurseSelf() { ! return recurseSelf; ! } ! /** ! * Override this method if you wish to do special ! * processing upon completion of parsing ! */ ! public void finishedParsing() { ! } } Index: ObjectFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/ObjectFindingVisitor.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** ObjectFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.30 --- ObjectFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.31 *************** *** 36,66 **** public class ObjectFindingVisitor extends NodeVisitor { ! private Class classTypeToFind; ! private int count = 0; ! private NodeList tags; ! ! public ObjectFindingVisitor(Class classTypeToFind) { ! this(classTypeToFind,false); ! } ! ! public ObjectFindingVisitor(Class classTypeToFind,boolean recurse) { ! super(recurse); ! this.classTypeToFind = classTypeToFind; ! this.tags = new NodeList(); ! } ! ! public int getCount() { ! return count; ! } ! public void visitTag(Tag tag) { ! if (tag.getClass().getName().equals(classTypeToFind.getName())) { ! count++; ! tags.add(tag); ! } ! } ! public Node[] getTags() { ! return tags.toNodeArray(); ! } } --- 36,66 ---- public class ObjectFindingVisitor extends NodeVisitor { ! private Class classTypeToFind; ! private int count = 0; ! private NodeList tags; ! ! public ObjectFindingVisitor(Class classTypeToFind) { ! this(classTypeToFind,false); ! } ! ! public ObjectFindingVisitor(Class classTypeToFind,boolean recurse) { ! super(recurse); ! this.classTypeToFind = classTypeToFind; ! this.tags = new NodeList(); ! } ! ! public int getCount() { ! return count; ! } ! public void visitTag(Tag tag) { ! if (tag.getClass().getName().equals(classTypeToFind.getName())) { ! count++; ! tags.add(tag); ! } ! } ! public Node[] getTags() { ! return tags.toNodeArray(); ! } } Index: StringFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/StringFindingVisitor.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** StringFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.30 --- StringFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.31 *************** *** 34,75 **** public class StringFindingVisitor extends NodeVisitor { ! private boolean stringFound = false; ! private String stringToFind; ! private int foundCount; ! private boolean multipleSearchesWithinStrings; ! ! public StringFindingVisitor(String stringToFind) { ! this.stringToFind = stringToFind.toUpperCase(); ! foundCount = 0; ! multipleSearchesWithinStrings = false; ! } ! ! public void doMultipleSearchesWithinStrings() { ! multipleSearchesWithinStrings = true; ! } ! ! public void visitStringNode(StringNode stringNode) { ! String stringToBeSearched = stringNode.getText().toUpperCase(); ! if (!multipleSearchesWithinStrings && ! stringToBeSearched.indexOf(stringToFind) != -1) { ! stringFound = true; ! foundCount++; ! } else if (multipleSearchesWithinStrings) { ! int index = -1; ! do { ! index = stringToBeSearched.indexOf(stringToFind, index+1); ! if (index!=-1) ! foundCount++; ! } while (index != -1); ! } ! } ! ! public boolean stringWasFound() { ! return stringFound; ! } ! ! public int stringFoundCount() { ! return foundCount; ! } } --- 34,75 ---- public class StringFindingVisitor extends NodeVisitor { ! private boolean stringFound = false; ! private String stringToFind; ! private int foundCount; ! private boolean multipleSearchesWithinStrings; ! ! public StringFindingVisitor(String stringToFind) { ! this.stringToFind = stringToFind.toUpperCase(); ! foundCount = 0; ! multipleSearchesWithinStrings = false; ! } ! ! public void doMultipleSearchesWithinStrings() { ! multipleSearchesWithinStrings = true; ! } ! ! public void visitStringNode(StringNode stringNode) { ! String stringToBeSearched = stringNode.getText().toUpperCase(); ! if (!multipleSearchesWithinStrings && ! stringToBeSearched.indexOf(stringToFind) != -1) { ! stringFound = true; ! foundCount++; ! } else if (multipleSearchesWithinStrings) { ! int index = -1; ! do { ! index = stringToBeSearched.indexOf(stringToFind, index+1); ! if (index!=-1) ! foundCount++; ! } while (index != -1); ! } ! } ! ! public boolean stringWasFound() { ! return stringFound; ! } ! ! public int stringFoundCount() { ! return foundCount; ! } } Index: TagFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TagFindingVisitor.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** TagFindingVisitor.java 24 Aug 2003 21:59:45 -0000 1.31 --- TagFindingVisitor.java 3 Sep 2003 23:36:22 -0000 1.32 *************** *** 37,95 **** public class TagFindingVisitor extends NodeVisitor { ! private String [] tagsToBeFound; ! private int count []; ! private int endTagCount []; ! private NodeList [] tags; ! private NodeList [] endTags; ! private boolean endTagCheck; ! ! public TagFindingVisitor(String [] tagsToBeFound) { ! this(tagsToBeFound,false); ! } ! public TagFindingVisitor(String [] tagsToBeFound, boolean endTagCheck) { ! this.tagsToBeFound = tagsToBeFound; ! this.tags = new NodeList[tagsToBeFound.length]; ! if (endTagCheck) { ! endTags = new NodeList[tagsToBeFound.length]; ! endTagCount = new int[tagsToBeFound.length]; ! } ! for (int i=0;i<tagsToBeFound.length;i++) { ! tags[i] = new NodeList(); ! if (endTagCheck) ! endTags[i] = new NodeList(); ! } ! this.count = new int[tagsToBeFound.length]; ! this.endTagCheck = endTagCheck; ! } ! ! public int getTagCount(int index) { ! return count[index]; ! } ! public void visitTag(Tag tag) { ! for (int i=0;i<tagsToBeFound.length;i++) ! if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! count[i]++; ! tags[i].add(tag); ! } ! } ! public Node [] getTags(int index) { ! return tags[index].toNodeArray(); ! } ! public void visitEndTag(EndTag endTag) { ! if (!endTagCheck) return; ! for (int i=0;i<tagsToBeFound.length;i++) ! if (endTag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! endTagCount[i]++; ! endTags[i].add(endTag); ! } ! } ! ! public int getEndTagCount(int index) { ! return endTagCount[index]; ! } ! } --- 37,95 ---- public class TagFindingVisitor extends NodeVisitor { ! private String [] tagsToBeFound; ! private int count []; ! private int endTagCount []; ! private NodeList [] tags; ! private NodeList [] endTags; ! private boolean endTagCheck; ! ! public TagFindingVisitor(String [] tagsToBeFound) { ! this(tagsToBeFound,false); ! } ! public TagFindingVisitor(String [] tagsToBeFound, boolean endTagCheck) { ! this.tagsToBeFound = tagsToBeFound; ! this.tags = new NodeList[tagsToBeFound.length]; ! if (endTagCheck) { ! endTags = new NodeList[tagsToBeFound.length]; ! endTagCount = new int[tagsToBeFound.length]; ! } ! for (int i=0;i<tagsToBeFound.length;i++) { ! tags[i] = new NodeList(); ! if (endTagCheck) ! endTags[i] = new NodeList(); ! } ! this.count = new int[tagsToBeFound.length]; ! this.endTagCheck = endTagCheck; ! } ! ! public int getTagCount(int index) { ! return count[index]; ! } ! public void visitTag(Tag tag) { ! for (int i=0;i<tagsToBeFound.length;i++) ! if (tag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! count[i]++; ! tags[i].add(tag); ! } ! } ! public Node [] getTags(int index) { ! return tags[index].toNodeArray(); ! } ! public void visitEndTag(EndTag endTag) { ! if (!endTagCheck) return; ! for (int i=0;i<tagsToBeFound.length;i++) ! if (endTag.getTagName().equalsIgnoreCase(tagsToBeFound[i])) { ! endTagCount[i]++; ! endTags[i].add(endTag); ! } ! } ! ! public int getEndTagCount(int index) { ! return endTagCount[index]; ! } ! } Index: TextExtractingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TextExtractingVisitor.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** TextExtractingVisitor.java 24 Aug 2003 21:59:45 -0000 1.29 --- TextExtractingVisitor.java 3 Sep 2003 23:36:22 -0000 1.30 *************** *** 47,92 **** */ public class TextExtractingVisitor extends NodeVisitor { ! private StringBuffer textAccumulator; ! private boolean preTagBeingProcessed; ! ! public TextExtractingVisitor() { ! textAccumulator = new StringBuffer(); ! preTagBeingProcessed = false; ! } ! public String getExtractedText() { ! return textAccumulator.toString(); ! } ! public void visitStringNode(StringNode stringNode) { ! String text = stringNode.getText(); ! if (!preTagBeingProcessed) { ! text = Translate.decode(text); ! text = replaceNonBreakingSpaceWithOrdinarySpace(text); ! } ! textAccumulator.append(text); ! } ! public void visitTitleTag(TitleTag titleTag) { ! textAccumulator.append(titleTag.getTitle ()); ! } ! private String replaceNonBreakingSpaceWithOrdinarySpace(String text) { ! return text.replace('\u00a0',' '); ! } ! public void visitEndTag(EndTag endTag) { ! if (isPreTag(endTag)) ! preTagBeingProcessed = false; ! } ! public void visitTag(Tag tag) { ! if (isPreTag(tag)) ! preTagBeingProcessed = true; ! } ! private boolean isPreTag(Tag tag) { ! return tag.getTagName().equals("PRE"); ! } } --- 47,92 ---- */ public class TextExtractingVisitor extends NodeVisitor { ! private StringBuffer textAccumulator; ! private boolean preTagBeingProcessed; ! ! public TextExtractingVisitor() { ! textAccumulator = new StringBuffer(); ! preTagBeingProcessed = false; ! } ! public String getExtractedText() { ! return textAccumulator.toString(); ! } ! public void visitStringNode(StringNode stringNode) { ! String text = stringNode.getText(); ! if (!preTagBeingProcessed) { ! text = Translate.decode(text); ! text = replaceNonBreakingSpaceWithOrdinarySpace(text); ! } ! textAccumulator.append(text); ! } ! public void visitTitleTag(TitleTag titleTag) { ! textAccumulator.append(titleTag.getTitle ()); ! } ! private String replaceNonBreakingSpaceWithOrdinarySpace(String text) { ! return text.replace('\u00a0',' '); ! } ! public void visitEndTag(EndTag endTag) { ! if (isPreTag(endTag)) ! preTagBeingProcessed = false; ! } ! public void visitTag(Tag tag) { ! if (isPreTag(tag)) ! preTagBeingProcessed = true; ! } ! private boolean isPreTag(Tag tag) { ! return tag.getTagName().equals("PRE"); ! } } Index: UrlModifyingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/UrlModifyingVisitor.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** UrlModifyingVisitor.java 24 Aug 2003 21:59:45 -0000 1.28 --- UrlModifyingVisitor.java 3 Sep 2003 23:36:22 -0000 1.29 *************** *** 39,83 **** public class UrlModifyingVisitor extends NodeVisitor { ! private String linkPrefix; ! private StringBuffer modifiedResult; ! private Parser parser; ! ! public UrlModifyingVisitor(Parser parser, String linkPrefix) { ! super(true,false); ! this.parser = parser; ! LinkScanner linkScanner = new LinkScanner(); ! parser.addScanner(linkScanner); ! parser.addScanner( ! linkScanner.createImageScanner( ! ImageTag.IMAGE_TAG_FILTER ! ) ! ); ! this.linkPrefix =linkPrefix; ! modifiedResult = new StringBuffer(); ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! linkTag.setLink(linkPrefix + linkTag.getLink()); ! } ! public void visitImageTag(ImageTag imageTag) { ! imageTag.setImageURL(linkPrefix + imageTag.getImageURL()); ! modifiedResult.append(imageTag.toHtml()); ! } ! ! public void visitEndTag(EndTag endTag) { ! modifiedResult.append(endTag.toHtml()); ! } ! public void visitStringNode(StringNode stringNode) { ! modifiedResult.append(stringNode.toHtml()); ! } ! public void visitTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); ! } ! ! public String getModifiedResult() { ! return modifiedResult.toString(); ! } } --- 39,83 ---- public class UrlModifyingVisitor extends NodeVisitor { ! private String linkPrefix; ! private StringBuffer modifiedResult; ! private Parser parser; ! ! public UrlModifyingVisitor(Parser parser, String linkPrefix) { ! super(true,false); ! this.parser = parser; ! LinkScanner linkScanner = new LinkScanner(); ! parser.addScanner(linkScanner); ! parser.addScanner( ! linkScanner.createImageScanner( ! ImageTag.IMAGE_TAG_FILTER ! ) ! ); ! this.linkPrefix =linkPrefix; ! modifiedResult = new StringBuffer(); ! } ! ! public void visitLinkTag(LinkTag linkTag) { ! linkTag.setLink(linkPrefix + linkTag.getLink()); ! } ! public void visitImageTag(ImageTag imageTag) { ! imageTag.setImageURL(linkPrefix + imageTag.getImageURL()); ! modifiedResult.append(imageTag.toHtml()); ! } ! ! public void visitEndTag(EndTag endTag) { ! modifiedResult.append(endTag.toHtml()); ! } ! public void visitStringNode(StringNode stringNode) { ! modifiedResult.append(stringNode.toHtml()); ! } ! public void visitTag(Tag tag) { ! modifiedResult.append(tag.toHtml()); ! } ! ! public String getModifiedResult() { ! return modifiedResult.toString(); ! } } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tags Modified Files: BaseHrefTag.java BodyTag.java Bullet.java BulletList.java CompositeTag.java Div.java DoctypeTag.java EndTag.java FormTag.java FrameSetTag.java FrameTag.java HeadTag.java Html.java ImageTag.java InputTag.java JspTag.java LabelTag.java MetaTag.java OptionTag.java ScriptTag.java SelectTag.java Span.java StyleTag.java TableColumn.java TableRow.java TableTag.java Tag.java TextareaTag.java TitleTag.java Log Message: Change tabs to spaces in all source files. Index: BaseHrefTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BaseHrefTag.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** BaseHrefTag.java 24 Aug 2003 21:59:42 -0000 1.22 --- BaseHrefTag.java 3 Sep 2003 23:36:20 -0000 1.23 *************** *** 41,60 **** */ public class BaseHrefTag extends Tag { ! private String baseUrl; ! public BaseHrefTag(TagData tagData, String baseUrl) { ! super(tagData); ! this.baseUrl = baseUrl; ! } ! public String getBaseUrl() { ! return baseUrl; ! } ! public void setBaseUrl(String baseUrl) { ! this.baseUrl = baseUrl; ! } ! public String toString() { ! return "BASE TAG\n"+ ! "--------\n"+ ! "Name : "+baseUrl; ! } } --- 41,60 ---- */ public class BaseHrefTag extends Tag { ! private String baseUrl; ! public BaseHrefTag(TagData tagData, String baseUrl) { ! super(tagData); ! this.baseUrl = baseUrl; ! } ! public String getBaseUrl() { ! return baseUrl; ! } ! public void setBaseUrl(String baseUrl) { ! this.baseUrl = baseUrl; ! } ! public String toString() { ! return "BASE TAG\n"+ ! "--------\n"+ ! "Name : "+baseUrl; ! } } Index: BodyTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BodyTag.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** BodyTag.java 24 Aug 2003 21:59:42 -0000 1.11 --- BodyTag.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 38,51 **** public class BodyTag extends CompositeTag { ! public BodyTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData,compositeTagData); ! } ! ! public String getBody() { ! return toPlainTextString(); ! } ! ! public String toString() { ! return "BODY: "+getBody(); ! } } --- 38,51 ---- public class BodyTag extends CompositeTag { ! public BodyTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData,compositeTagData); ! } ! ! public String getBody() { ! return toPlainTextString(); ! } ! ! public String toString() { ! return "BODY: "+getBody(); ! } } Index: Bullet.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Bullet.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Bullet.java 24 Aug 2003 21:59:42 -0000 1.11 --- Bullet.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 35,41 **** public class Bullet extends CompositeTag { ! public Bullet(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } } --- 35,41 ---- public class Bullet extends CompositeTag { ! public Bullet(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } } Index: BulletList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BulletList.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** BulletList.java 24 Aug 2003 21:59:42 -0000 1.11 --- BulletList.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 40,51 **** public class BulletList extends CompositeTag { ! public BulletList(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! public String toString() { ! // TODO Auto-generated method stub ! return "BulletList"; ! } } --- 40,51 ---- public class BulletList extends CompositeTag { ! public BulletList(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! public String toString() { ! // TODO Auto-generated method stub ! return "BulletList"; ! } } Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** CompositeTag.java 24 Aug 2003 21:59:42 -0000 1.51 --- CompositeTag.java 3 Sep 2003 23:36:20 -0000 1.52 *************** *** 38,49 **** public abstract class CompositeTag extends Tag { ! protected Tag startTag, endTag; ! public CompositeTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData); ! this.startTag = compositeTagData.getStartTag(); ! this.endTag = compositeTagData.getEndTag(); ! setChildren (compositeTagData.getChildren()); ! } /** --- 38,49 ---- public abstract class CompositeTag extends Tag { ! protected Tag startTag, endTag; ! public CompositeTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData); ! this.startTag = compositeTagData.getStartTag(); ! this.endTag = compositeTagData.getEndTag(); ! setChildren (compositeTagData.getChildren()); ! } /** *************** *** 51,58 **** * @return Am iterator over the children of this node. */ ! public SimpleNodeIterator children () { ! return (getChildren ().elements ()); ! } /** --- 51,58 ---- * @return Am iterator over the children of this node. */ ! public SimpleNodeIterator children () { ! return (getChildren ().elements ()); ! } /** *************** *** 61,68 **** * @return The child at that index. */ ! public Node getChild (int index) { ! return (getChildren ().elementAt (index)); ! } /** --- 61,68 ---- * @return The child at that index. */ ! public Node getChild (int index) { ! return (getChildren ().elementAt (index)); ! } /** *************** *** 70,86 **** * @return The children in an array. */ ! public Node [] getChildrenAsNodeArray () { ! return (getChildren ().toNodeArray ()); ! } ! /** * Remove the child at the position given. * @param i The index of the child to remove. */ ! public void removeChild (int i) { ! getChildren ().remove (i); ! } /** --- 70,86 ---- * @return The children in an array. */ ! public Node [] getChildrenAsNodeArray () { ! return (getChildren ().toNodeArray ()); ! } ! /** * Remove the child at the position given. * @param i The index of the child to remove. */ ! public void removeChild (int i) { ! getChildren ().remove (i); ! } /** *************** *** 89,93 **** * @return An iterator over the children. */ ! public SimpleNodeIterator elements() { return (getChildren ().elements ()); --- 89,93 ---- * @return An iterator over the children. */ ! public SimpleNodeIterator elements() { return (getChildren ().elements ()); *************** *** 95,354 **** public String toPlainTextString() { ! StringBuffer stringRepresentation = new StringBuffer(); ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! stringRepresentation.append(e.nextNode().toPlainTextString()); ! } ! return stringRepresentation.toString(); ! } ! public void putStartTagInto(StringBuffer sb) { ! sb.append(startTag.toHtml()); ! } ! protected void putChildrenInto(StringBuffer sb) { ! Node node,prevNode=startTag; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! node = e.nextNode(); ! if (prevNode!=null) { ! if (prevNode.elementEnd()>node.elementBegin()) { ! // Its a new line ! sb.append(Parser.getLineSeparator()); ! } ! } ! sb.append(node.toHtml()); ! prevNode=node; ! } ! if (prevNode.elementEnd()>endTag.elementBegin()) { ! sb.append(Parser.getLineSeparator()); ! } ! } ! protected void putEndTagInto(StringBuffer sb) { ! sb.append(endTag.toHtml()); ! } ! public String toHtml() { ! StringBuffer sb = new StringBuffer(); ! putStartTagInto(sb); ! if (!startTag.isEmptyXmlTag()) { ! putChildrenInto(sb); ! putEndTagInto(sb); ! } ! return sb.toString(); ! } ! /** ! * Searches all children who for a name attribute. Returns first match. ! * @param name Attribute to match in tag ! * @return Tag Tag matching the name attribute ! */ ! public Tag searchByName(String name) { ! Node node; ! Tag tag=null; ! boolean found = false; ! for (SimpleNodeIterator e = children();e.hasMoreNodes() && !found;) { ! node = (Node)e.nextNode(); ! if (node instanceof Tag) { ! tag = (Tag)node; ! String nameAttribute = tag.getAttribute("NAME"); ! if (nameAttribute!=null && nameAttribute.equals(name)) found=true; ! } ! } ! if (found) ! return tag; ! else ! return null; ! } ! /** ! * Searches for any node whose text representation contains the search ! * string. Collects all such nodes in a NodeList. ! * e.g. if you wish to find any textareas in a form tag containing "hello ! * world", the code would be : ! * <code> ! * NodeList nodeList = formTag.searchFor("Hello World"); ! * </code> ! * @param searchString search criterion ! * @param caseSensitive specify whether this search should be case ! * sensitive ! * @return NodeList Collection of nodes whose string contents or ! * representation have the searchString in them ! */ ! public NodeList searchFor(String searchString, boolean caseSensitive) { ! NodeList foundList = new NodeList(); ! Node node; ! if (!caseSensitive) searchString = searchString.toUpperCase(); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! node = e.nextNode(); ! String nodeTextString = node.toPlainTextString(); ! if (!caseSensitive) nodeTextString=nodeTextString.toUpperCase(); ! if (nodeTextString.indexOf(searchString)!=-1) { ! foundList.add(node); ! } ! } ! return foundList; ! } ! /** ! * Collect all objects that are of a certain type ! * Note that this will not check for parent types, and will not ! * recurse through child tags ! * @param classType ! * @return NodeList ! */ ! public NodeList searchFor(Class classType) { ! return (getChildren ().searchFor (classType)); ! } ! /** ! * Searches for any node whose text representation contains the search ! * string. Collects all such nodes in a NodeList. ! * e.g. if you wish to find any textareas in a form tag containing "hello ! * world", the code would be : ! * <code> ! * NodeList nodeList = formTag.searchFor("Hello World"); ! * </code> ! * This search is <b>case-insensitive</b>. ! * @param searchString search criterion ! * @return NodeList Collection of nodes whose string contents or ! * representation have the searchString in them ! */ ! public NodeList searchFor(String searchString) { ! return searchFor(searchString, false); ! } ! /** ! * Returns the node number of the string node containing the ! * given text. This can be useful to index into the composite tag ! * and get other children. ! * @param text ! * @return int ! */ ! public int findPositionOf(String text) { ! Node node; ! int loc = 0; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! node = e.nextNode(); ! if (node.toPlainTextString().toUpperCase().indexOf(text.toUpperCase())!=-1) { ! return loc; ! } ! loc++; ! } ! return -1; ! } ! ! /** ! * Returns the node number of a child node given the node object. ! * This would typically be used in conjuction with digUpStringNode, ! * after which the string node's parent can be used to find the ! * string node's position. Faster than calling findPositionOf(text) ! * again. Note that the position is at a linear level alone - there ! * is no recursion in this method. ! * @param searchNode The child node to find. ! * @return The offset of the child tag or -1 if it was not found. ! */ ! public int findPositionOf(Node searchNode) { ! Node node; ! int loc = 0; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! node = e.nextNode(); ! if (node==searchNode) { ! return loc; ! } ! loc++; ! } ! return -1; ! } ! ! /** ! * Get child at given index ! * @param index ! * @return Node ! */ ! public Node childAt(int index) { ! return (getChildren ().elementAt (index)); ! } ! ! public void collectInto(NodeList collectionList, String filter) { ! super.collectInto(collectionList, filter); ! Node node; ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! node = e.nextNode(); ! node.collectInto(collectionList,filter); ! } ! } ! public void collectInto(NodeList collectionList, Class nodeType) { ! super.collectInto(collectionList,nodeType); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! e.nextNode().collectInto(collectionList,nodeType); ! } ! } ! ! public String getChildrenHTML() { ! StringBuffer buff = new StringBuffer(); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! AbstractNode node = (AbstractNode)e.nextNode(); ! buff.append(node.toHtml()); ! } ! return buff.toString(); ! } ! ! public void accept(NodeVisitor visitor) { ! if (visitor.shouldRecurseChildren()) { ! startTag.accept(visitor); ! SimpleNodeIterator children = children(); ! while (children.hasMoreNodes()) { ! Node child = (Node)children.nextNode(); ! child.accept(visitor); ! } ! endTag.accept(visitor); ! } ! if (visitor.shouldRecurseSelf()) ! visitor.visitTag(this); ! } ! public int getChildCount() { ! return (getChildren ().size ()); ! } ! public Tag getStartTag() { ! return startTag; ! } ! public Tag getEndTag() { ! return endTag; ! } ! /** ! * Finds a string node, however embedded it might be, and returns ! * it. The string node will retain links to its parents, so ! * further navigation is possible. ! * @param searchText ! * @return The list of string nodes (recursively) found. ! */ ! public StringNode [] digupStringNode(String searchText) { ! NodeList nodeList = searchFor(searchText); ! NodeList stringNodes = new NodeList(); ! for (int i=0;i<nodeList.size();i++) { ! Node node = nodeList.elementAt(i); ! if (node instanceof StringNode) { ! stringNodes.add(node); ! } else { ! if (node instanceof CompositeTag) { ! CompositeTag ctag = (CompositeTag)node; ! StringNode [] nodes = ctag.digupStringNode(searchText); ! for (int j=0;j<nodes.length;j++) ! stringNodes.add(nodes[j]); ! } ! } ! } ! StringNode [] stringNode = new StringNode[stringNodes.size()]; ! for (int i=0;i<stringNode.length;i++) { ! stringNode[i] = (StringNode)stringNodes.elementAt(i); ! } ! return stringNode; ! } --- 95,354 ---- public String toPlainTextString() { ! StringBuffer stringRepresentation = new StringBuffer(); ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! stringRepresentation.append(e.nextNode().toPlainTextString()); ! } ! return stringRepresentation.toString(); ! } ! public void putStartTagInto(StringBuffer sb) { ! sb.append(startTag.toHtml()); ! } ! protected void putChildrenInto(StringBuffer sb) { ! Node node,prevNode=startTag; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! node = e.nextNode(); ! if (prevNode!=null) { ! if (prevNode.elementEnd()>node.elementBegin()) { ! // Its a new line ! sb.append(Parser.getLineSeparator()); ! } ! } ! sb.append(node.toHtml()); ! prevNode=node; ! } ! if (prevNode.elementEnd()>endTag.elementBegin()) { ! sb.append(Parser.getLineSeparator()); ! } ! } ! protected void putEndTagInto(StringBuffer sb) { ! sb.append(endTag.toHtml()); ! } ! public String toHtml() { ! StringBuffer sb = new StringBuffer(); ! putStartTagInto(sb); ! if (!startTag.isEmptyXmlTag()) { ! putChildrenInto(sb); ! putEndTagInto(sb); ! } ! return sb.toString(); ! } ! /** ! * Searches all children who for a name attribute. Returns first match. ! * @param name Attribute to match in tag ! * @return Tag Tag matching the name attribute ! */ ! public Tag searchByName(String name) { ! Node node; ! Tag tag=null; ! boolean found = false; ! for (SimpleNodeIterator e = children();e.hasMoreNodes() && !found;) { ! node = (Node)e.nextNode(); ! if (node instanceof Tag) { ! tag = (Tag)node; ! String nameAttribute = tag.getAttribute("NAME"); ! if (nameAttribute!=null && nameAttribute.equals(name)) found=true; ! } ! } ! if (found) ! return tag; ! else ! return null; ! } ! /** ! * Searches for any node whose text representation contains the search ! * string. Collects all such nodes in a NodeList. ! * e.g. if you wish to find any textareas in a form tag containing "hello ! * world", the code would be : ! * <code> ! * NodeList nodeList = formTag.searchFor("Hello World"); ! * </code> ! * @param searchString search criterion ! * @param caseSensitive specify whether this search should be case ! * sensitive ! * @return NodeList Collection of nodes whose string contents or ! * representation have the searchString in them ! */ ! public NodeList searchFor(String searchString, boolean caseSensitive) { ! NodeList foundList = new NodeList(); ! Node node; ! if (!caseSensitive) searchString = searchString.toUpperCase(); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! node = e.nextNode(); ! String nodeTextString = node.toPlainTextString(); ! if (!caseSensitive) nodeTextString=nodeTextString.toUpperCase(); ! if (nodeTextString.indexOf(searchString)!=-1) { ! foundList.add(node); ! } ! } ! return foundList; ! } ! /** ! * Collect all objects that are of a certain type ! * Note that this will not check for parent types, and will not ! * recurse through child tags ! * @param classType ! * @return NodeList ! */ ! public NodeList searchFor(Class classType) { ! return (getChildren ().searchFor (classType)); ! } ! /** ! * Searches for any node whose text representation contains the search ! * string. Collects all such nodes in a NodeList. ! * e.g. if you wish to find any textareas in a form tag containing "hello ! * world", the code would be : ! * <code> ! * NodeList nodeList = formTag.searchFor("Hello World"); ! * </code> ! * This search is <b>case-insensitive</b>. ! * @param searchString search criterion ! * @return NodeList Collection of nodes whose string contents or ! * representation have the searchString in them ! */ ! public NodeList searchFor(String searchString) { ! return searchFor(searchString, false); ! } ! /** ! * Returns the node number of the string node containing the ! * given text. This can be useful to index into the composite tag ! * and get other children. ! * @param text ! * @return int ! */ ! public int findPositionOf(String text) { ! Node node; ! int loc = 0; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! node = e.nextNode(); ! if (node.toPlainTextString().toUpperCase().indexOf(text.toUpperCase())!=-1) { ! return loc; ! } ! loc++; ! } ! return -1; ! } ! ! /** ! * Returns the node number of a child node given the node object. ! * This would typically be used in conjuction with digUpStringNode, ! * after which the string node's parent can be used to find the ! * string node's position. Faster than calling findPositionOf(text) ! * again. Note that the position is at a linear level alone - there ! * is no recursion in this method. ! * @param searchNode The child node to find. ! * @return The offset of the child tag or -1 if it was not found. ! */ ! public int findPositionOf(Node searchNode) { ! Node node; ! int loc = 0; ! for (SimpleNodeIterator e=children();e.hasMoreNodes();) { ! node = e.nextNode(); ! if (node==searchNode) { ! return loc; ! } ! loc++; ! } ! return -1; ! } ! ! /** ! * Get child at given index ! * @param index ! * @return Node ! */ ! public Node childAt(int index) { ! return (getChildren ().elementAt (index)); ! } ! ! public void collectInto(NodeList collectionList, String filter) { ! super.collectInto(collectionList, filter); ! Node node; ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! node = e.nextNode(); ! node.collectInto(collectionList,filter); ! } ! } ! public void collectInto(NodeList collectionList, Class nodeType) { ! super.collectInto(collectionList,nodeType); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! e.nextNode().collectInto(collectionList,nodeType); ! } ! } ! ! public String getChildrenHTML() { ! StringBuffer buff = new StringBuffer(); ! for (SimpleNodeIterator e = children();e.hasMoreNodes();) { ! AbstractNode node = (AbstractNode)e.nextNode(); ! buff.append(node.toHtml()); ! } ! return buff.toString(); ! } ! ! public void accept(NodeVisitor visitor) { ! if (visitor.shouldRecurseChildren()) { ! startTag.accept(visitor); ! SimpleNodeIterator children = children(); ! while (children.hasMoreNodes()) { ! Node child = (Node)children.nextNode(); ! child.accept(visitor); ! } ! endTag.accept(visitor); ! } ! if (visitor.shouldRecurseSelf()) ! visitor.visitTag(this); ! } ! public int getChildCount() { ! return (getChildren ().size ()); ! } ! public Tag getStartTag() { ! return startTag; ! } ! public Tag getEndTag() { ! return endTag; ! } ! /** ! * Finds a string node, however embedded it might be, and returns ! * it. The string node will retain links to its parents, so ! * further navigation is possible. ! * @param searchText ! * @return The list of string nodes (recursively) found. ! */ ! public StringNode [] digupStringNode(String searchText) { ! NodeList nodeList = searchFor(searchText); ! NodeList stringNodes = new NodeList(); ! for (int i=0;i<nodeList.size();i++) { ! Node node = nodeList.elementAt(i); ! if (node instanceof StringNode) { ! stringNodes.add(node); ! } else { ! if (node instanceof CompositeTag) { ! CompositeTag ctag = (CompositeTag)node; ! StringNode [] nodes = ctag.digupStringNode(searchText); ! for (int j=0;j<nodes.length;j++) ! stringNodes.add(nodes[j]); ! } ! } ! } ! StringNode [] stringNode = new StringNode[stringNodes.size()]; ! for (int i=0;i<stringNode.length;i++) { ! stringNode[i] = (StringNode)stringNodes.elementAt(i); ! } ! return stringNode; ! } Index: Div.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Div.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** Div.java 24 Aug 2003 21:59:42 -0000 1.11 --- Div.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 34,40 **** public class Div extends CompositeTag { ! public Div(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } } --- 34,40 ---- public class Div extends CompositeTag { ! public Div(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } } Index: DoctypeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/DoctypeTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** DoctypeTag.java 24 Aug 2003 21:59:42 -0000 1.24 --- DoctypeTag.java 3 Sep 2003 23:36:20 -0000 1.25 *************** *** 38,58 **** { /** ! * The HTMLDoctypeTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param tagData The data for this tag. ! */ ! public DoctypeTag(TagData tagData) ! { ! super(tagData); ! } ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { ! return "Doctype Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! public String toHtml() { ! return "<!DOCTYPE "+tagContents+">"; ! } } --- 38,58 ---- { /** ! * The HTMLDoctypeTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param tagData The data for this tag. ! */ ! public DoctypeTag(TagData tagData) ! { ! super(tagData); ! } ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { ! return "Doctype Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! public String toHtml() { ! return "<!DOCTYPE "+tagContents+">"; ! } } Index: EndTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/EndTag.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** EndTag.java 24 Aug 2003 21:59:42 -0000 1.27 --- EndTag.java 3 Sep 2003 23:36:20 -0000 1.28 *************** *** 37,133 **** public class EndTag extends Tag { ! public final static String TYPE="END_TAG"; ! public final static int ENDTAG_BEFORE_PARSING_STATE=0; public final static int ENDTAG_WAIT_FOR_SLASH_STATE=1; public final static int ENDTAG_BEGIN_PARSING_STATE=2; ! public final static int ENDTAG_FINISHED_PARSING_STATE=3; ! ! /** ! * Constructor takes 3 arguments to construct an EndTag object. ! * @param tagData The data for this tag. ! */ ! public EndTag(TagData tagData) ! { ! super(tagData); ! } ! /** ! * Locate the end tag withing the input string, by parsing from the given position ! * @param input Input String ! * @param position Position to start parsing from ! */ ! public static AbstractNode find(String input,int position) ! { ! int state = ENDTAG_BEFORE_PARSING_STATE; ! StringBuffer tagContents = new StringBuffer(); ! int tagBegin=0; ! int tagEnd=0; ! int inputLen = input.length(); ! char ch; ! int i ; ! for (i=position;(i<inputLen&& state!=ENDTAG_FINISHED_PARSING_STATE);i++) ! { ! ch = input.charAt(i); ! if (ch=='>' && state==ENDTAG_BEGIN_PARSING_STATE) ! { ! state = ENDTAG_FINISHED_PARSING_STATE; ! tagEnd = i; ! } ! if (state==ENDTAG_BEGIN_PARSING_STATE) ! { ! tagContents.append(ch); ! } ! if (state==ENDTAG_WAIT_FOR_SLASH_STATE) ! { ! if (ch=='/') ! { ! state = ENDTAG_BEGIN_PARSING_STATE; ! } ! else return null; ! } ! if (ch=='<') ! { ! if (state==ENDTAG_BEFORE_PARSING_STATE) ! { ! // Transition from State 0 to State 1 - Record data till > is encountered ! tagBegin = i; ! state = ENDTAG_WAIT_FOR_SLASH_STATE; ! } ! else if (state==ENDTAG_BEGIN_PARSING_STATE) ! { ! state=ENDTAG_FINISHED_PARSING_STATE; ! tagEnd=i; ! } ! } else if (state == ENDTAG_BEFORE_PARSING_STATE) // text before the end tag return (null); ! } ! // If parsing did not complete, it might be possible to accept ! if (state==ENDTAG_BEGIN_PARSING_STATE) { ! tagEnd=i; ! state=ENDTAG_FINISHED_PARSING_STATE; ! } ! if (state==ENDTAG_FINISHED_PARSING_STATE) ! return new EndTag(new TagData(tagBegin,tagEnd,tagContents.toString(),input)); ! else return null; ! } ! public String toPlainTextString() { ! return ""; ! } ! public String toHtml() { ! return "</"+getTagName()+">"; ! } ! public String toString() { ! return "EndTag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! ! public void accept(NodeVisitor visitor) { ! visitor.visitEndTag(this); ! } ! public String getType() { ! return TYPE; ! } } --- 37,133 ---- public class EndTag extends Tag { ! public final static String TYPE="END_TAG"; ! public final static int ENDTAG_BEFORE_PARSING_STATE=0; public final static int ENDTAG_WAIT_FOR_SLASH_STATE=1; public final static int ENDTAG_BEGIN_PARSING_STATE=2; ! public final static int ENDTAG_FINISHED_PARSING_STATE=3; ! ! /** ! * Constructor takes 3 arguments to construct an EndTag object. ! * @param tagData The data for this tag. ! */ ! public EndTag(TagData tagData) ! { ! super(tagData); ! } ! /** ! * Locate the end tag withing the input string, by parsing from the given position ! * @param input Input String ! * @param position Position to start parsing from ! */ ! public static AbstractNode find(String input,int position) ! { ! int state = ENDTAG_BEFORE_PARSING_STATE; ! StringBuffer tagContents = new StringBuffer(); ! int tagBegin=0; ! int tagEnd=0; ! int inputLen = input.length(); ! char ch; ! int i ; ! for (i=position;(i<inputLen&& state!=ENDTAG_FINISHED_PARSING_STATE);i++) ! { ! ch = input.charAt(i); ! if (ch=='>' && state==ENDTAG_BEGIN_PARSING_STATE) ! { ! state = ENDTAG_FINISHED_PARSING_STATE; ! tagEnd = i; ! } ! if (state==ENDTAG_BEGIN_PARSING_STATE) ! { ! tagContents.append(ch); ! } ! if (state==ENDTAG_WAIT_FOR_SLASH_STATE) ! { ! if (ch=='/') ! { ! state = ENDTAG_BEGIN_PARSING_STATE; ! } ! else return null; ! } ! if (ch=='<') ! { ! if (state==ENDTAG_BEFORE_PARSING_STATE) ! { ! // Transition from State 0 to State 1 - Record data till > is encountered ! tagBegin = i; ! state = ENDTAG_WAIT_FOR_SLASH_STATE; ! } ! else if (state==ENDTAG_BEGIN_PARSING_STATE) ! { ! state=ENDTAG_FINISHED_PARSING_STATE; ! tagEnd=i; ! } ! } else if (state == ENDTAG_BEFORE_PARSING_STATE) // text before the end tag return (null); ! } ! // If parsing did not complete, it might be possible to accept ! if (state==ENDTAG_BEGIN_PARSING_STATE) { ! tagEnd=i; ! state=ENDTAG_FINISHED_PARSING_STATE; ! } ! if (state==ENDTAG_FINISHED_PARSING_STATE) ! return new EndTag(new TagData(tagBegin,tagEnd,tagContents.toString(),input)); ! else return null; ! } ! public String toPlainTextString() { ! return ""; ! } ! public String toHtml() { ! return "</"+getTagName()+">"; ! } ! public String toString() { ! return "EndTag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! ! public void accept(NodeVisitor visitor) { ! visitor.visitEndTag(this); ! } ! public String getType() { ! return TYPE; ! } } Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** FormTag.java 24 Aug 2003 21:59:42 -0000 1.30 --- FormTag.java 3 Sep 2003 23:36:20 -0000 1.31 *************** *** 48,175 **** public class FormTag extends CompositeTag { ! public static final String POST="POST"; ! public static final String GET="GET"; ! protected String formURL; ! protected String formName; ! protected String formMethod; ! protected NodeList formInputList; ! private NodeList textAreaList; ! ! /** ! * Constructor takes in tagData, compositeTagData, formTagData ! * @param tagData ! * @param compositeTagData ! */ ! public FormTag(TagData tagData, CompositeTagData compositeTagData) ! { ! super(tagData,compositeTagData); ! ! this.formURL = compositeTagData.getStartTag().getAttribute("ACTION"); ! this.formName = compositeTagData.getStartTag().getAttribute("NAME"); ! this.formMethod = compositeTagData.getStartTag().getAttribute("METHOD"); ! this.formInputList = compositeTagData.getChildren().searchFor(InputTag.class, true); ! this.textAreaList = compositeTagData.getChildren().searchFor(TextareaTag.class, true); ! } ! ! /** * Get the list of input fields. ! * @return Input elements in the form. ! */ ! public NodeList getFormInputs() { ! return formInputList; ! } ! ! /** * Get the list of text areas. ! * @return Textarea elements in the form ! */ ! public NodeList getFormTextareas() { ! return textAreaList; ! } ! ! /** ! * @return String The url of the form ! */ ! public String getFormLocation() ! { ! return formURL; ! } ! ! /** ! * Returns the method of the form ! * @return String The method of the form (GET if nothing is specified) ! */ ! public String getFormMethod() { ! if(formMethod==null) ! { ! formMethod = "GET"; ! } ! return formMethod; ! } ! ! /** ! * Get the input tag in the form corresponding to the given name ! * @param name The name of the input tag to be retrieved ! * @return Tag The input tag corresponding to the name provided ! */ ! public InputTag getInputTag(String name) { ! InputTag inputTag=null; ! boolean found=false; ! for (SimpleNodeIterator e = formInputList.elements();e.hasMoreNodes() && !found;) { ! inputTag = (InputTag)e.nextNode(); ! String inputTagName = inputTag.getAttribute("NAME"); ! if (inputTagName!=null && inputTagName.equalsIgnoreCase(name)) { ! found=true; ! } ! } ! if (found) ! return inputTag; else return null; ! } ! ! /** ! * @return String The name of the form ! */ ! public String getFormName() { ! return formName; ! } ! ! /** ! * Set the form location. Modification of this element will cause the HTML rendering ! * to change as well (in a call to toHTML()). ! * @param formURL The new FORM location ! */ ! public void setFormLocation(String formURL) { setAttribute ("ACTION", formURL); ! this.formURL = formURL; ! } ! /** ! * @return String The contents of the FormTag ! */ ! public String toString() { ! return "FORM TAG : Form at "+formURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! /** ! * Find the textarea tag matching the given name ! * @param name Name of the textarea tag to be found within the form ! */ ! public TextareaTag getTextAreaTag(String name) { ! TextareaTag textareaTag=null; ! boolean found = false; ! for (SimpleNodeIterator e=textAreaList.elements();e.hasMoreNodes() && !found;) { ! textareaTag = (TextareaTag)e.nextNode(); ! String textAreaName = textareaTag.getAttribute("NAME"); ! if (textAreaName!=null && textAreaName.equals(name)) { ! found = true; ! } ! } ! if (found) ! return textareaTag; ! else ! return null; ! } ! } --- 48,175 ---- public class FormTag extends CompositeTag { ! public static final String POST="POST"; ! public static final String GET="GET"; ! protected String formURL; ! protected String formName; ! protected String formMethod; ! protected NodeList formInputList; ! private NodeList textAreaList; ! ! /** ! * Constructor takes in tagData, compositeTagData, formTagData ! * @param tagData ! * @param compositeTagData ! */ ! public FormTag(TagData tagData, CompositeTagData compositeTagData) ! { ! super(tagData,compositeTagData); ! ! this.formURL = compositeTagData.getStartTag().getAttribute("ACTION"); ! this.formName = compositeTagData.getStartTag().getAttribute("NAME"); ! this.formMethod = compositeTagData.getStartTag().getAttribute("METHOD"); ! this.formInputList = compositeTagData.getChildren().searchFor(InputTag.class, true); ! this.textAreaList = compositeTagData.getChildren().searchFor(TextareaTag.class, true); ! } ! ! /** * Get the list of input fields. ! * @return Input elements in the form. ! */ ! public NodeList getFormInputs() { ! return formInputList; ! } ! ! /** * Get the list of text areas. ! * @return Textarea elements in the form ! */ ! public NodeList getFormTextareas() { ! return textAreaList; ! } ! ! /** ! * @return String The url of the form ! */ ! public String getFormLocation() ! { ! return formURL; ! } ! ! /** ! * Returns the method of the form ! * @return String The method of the form (GET if nothing is specified) ! */ ! public String getFormMethod() { ! if(formMethod==null) ! { ! formMethod = "GET"; ! } ! return formMethod; ! } ! ! /** ! * Get the input tag in the form corresponding to the given name ! * @param name The name of the input tag to be retrieved ! * @return Tag The input tag corresponding to the name provided ! */ ! public InputTag getInputTag(String name) { ! InputTag inputTag=null; ! boolean found=false; ! for (SimpleNodeIterator e = formInputList.elements();e.hasMoreNodes() && !found;) { ! inputTag = (InputTag)e.nextNode(); ! String inputTagName = inputTag.getAttribute("NAME"); ! if (inputTagName!=null && inputTagName.equalsIgnoreCase(name)) { ! found=true; ! } ! } ! if (found) ! return inputTag; else return null; ! } ! ! /** ! * @return String The name of the form ! */ ! public String getFormName() { ! return formName; ! } ! ! /** ! * Set the form location. Modification of this element will cause the HTML rendering ! * to change as well (in a call to toHTML()). ! * @param formURL The new FORM location ! */ ! public void setFormLocation(String formURL) { setAttribute ("ACTION", formURL); ! this.formURL = formURL; ! } ! /** ! * @return String The contents of the FormTag ! */ ! public String toString() { ! return "FORM TAG : Form at "+formURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! /** ! * Find the textarea tag matching the given name ! * @param name Name of the textarea tag to be found within the form ! */ ! public TextareaTag getTextAreaTag(String name) { ! TextareaTag textareaTag=null; ! boolean found = false; ! for (SimpleNodeIterator e=textAreaList.elements();e.hasMoreNodes() && !found;) { ! textareaTag = (TextareaTag)e.nextNode(); ! String textAreaName = textareaTag.getAttribute("NAME"); ! if (textAreaName!=null && textAreaName.equals(name)) { ! found = true; ! } ! } ! if (found) ! return textareaTag; ! else ! return null; ! } ! } Index: FrameSetTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameSetTag.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** FrameSetTag.java 24 Aug 2003 21:59:42 -0000 1.22 --- FrameSetTag.java 3 Sep 2003 23:36:20 -0000 1.23 *************** *** 39,95 **** public class FrameSetTag extends CompositeTag { ! /** ! * The URL where the image is stored. ! */ ! protected String frameURL; ! protected String frameName; ! protected NodeList frames; ! public FrameSetTag(TagData tagData,CompositeTagData compositeTagData) { ! super(tagData,compositeTagData); ! this.frames = compositeTagData.getChildren(); ! } ! ! /** ! * Returns the location of the frame ! */ ! public String getFrameLocation() { ! return frameURL; ! } ! ! public String getFrameName() { ! return frameName; ! } ! ! /** ! * Print the contents of the HTMLImageNode ! */ ! public String toString() { ! return "FRAME TAG : Image at "+frameURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! ! /** ! * Returns the frames. ! * @return Vector ! */ ! public NodeList getFrames() { ! return frames; ! } ! public FrameTag getFrame(String frameName) { ! boolean found = false; ! FrameTag frameTag=null; ! for (SimpleNodeIterator e=frames.elements();e.hasMoreNodes() && !found;) { ! frameTag = (FrameTag)e.nextNode(); ! if (frameTag.getFrameName().toUpperCase().equals(frameName.toUpperCase())) found = true; ! } ! if (found) ! return frameTag; else return null; ! } ! /** ! * Sets the frames. ! * @param frames The frames to set ! */ ! public void setFrames(NodeList frames) { ! this.frames = frames; ! } } --- 39,95 ---- public class FrameSetTag extends CompositeTag { ! /** ! * The URL where the image is stored. ! */ ! protected String frameURL; ! protected String frameName; ! protected NodeList frames; ! public FrameSetTag(TagData tagData,CompositeTagData compositeTagData) { ! super(tagData,compositeTagData); ! this.frames = compositeTagData.getChildren(); ! } ! ! /** ! * Returns the location of the frame ! */ ! public String getFrameLocation() { ! return frameURL; ! } ! ! public String getFrameName() { ! return frameName; ! } ! ! /** ! * Print the contents of the HTMLImageNode ! */ ! public String toString() { ! return "FRAME TAG : Image at "+frameURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! ! /** ! * Returns the frames. ! * @return Vector ! */ ! public NodeList getFrames() { ! return frames; ! } ! public FrameTag getFrame(String frameName) { ! boolean found = false; ! FrameTag frameTag=null; ! for (SimpleNodeIterator e=frames.elements();e.hasMoreNodes() && !found;) { ! frameTag = (FrameTag)e.nextNode(); ! if (frameTag.getFrameName().toUpperCase().equals(frameName.toUpperCase())) found = true; ! } ! if (found) ! return frameTag; else return null; ! } ! /** ! * Sets the frames. ! * @param frames The frames to set ! */ ! public void setFrames(NodeList frames) { ! this.frames = frames; ! } } Index: FrameTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameTag.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** FrameTag.java 24 Aug 2003 21:59:42 -0000 1.22 --- FrameTag.java 3 Sep 2003 23:36:20 -0000 1.23 *************** *** 36,63 **** public class FrameTag extends Tag { ! /** ! * The URL where the image is stored. ! */ ! protected String frameURL; protected String frameName; ! public FrameTag(TagData tagData, String frameURL,String frameName) { ! super(tagData); ! this.frameURL = frameURL; this.frameName = frameName; ! } ! /** ! * Returns the location of the image ! */ ! public String getFrameLocation() { ! return frameURL; ! } ! public String getFrameName() { ! return frameName; ! } ! /** ! * Print the contents of the HTMLFrameTag ! */ ! public String toString() { ! return "FRAME TAG : Image at "+frameURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } } --- 36,63 ---- public class FrameTag extends Tag { ! /** ! * The URL where the image is stored. ! */ ! protected String frameURL; protected String frameName; ! public FrameTag(TagData tagData, String frameURL,String frameName) { ! super(tagData); ! this.frameURL = frameURL; this.frameName = frameName; ! } ! /** ! * Returns the location of the image ! */ ! public String getFrameLocation() { ! return frameURL; ! } ! public String getFrameName() { ! return frameName; ! } ! /** ! * Print the contents of the HTMLFrameTag ! */ ! public String toString() { ! return "FRAME TAG : Image at "+frameURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } } Index: HeadTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/HeadTag.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** HeadTag.java 24 Aug 2003 21:59:42 -0000 1.11 --- HeadTag.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 1,46 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by Dhaval Udani ! // dha...@or... ! ! package org.htmlparser.tags; ! ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! ! public class HeadTag extends CompositeTag { ! ! public HeadTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! ! public String toString() { ! return "HEAD: " + super.toString(); ! } ! } --- 1,46 ---- ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by Dhaval Udani ! // dha...@or... ! ! package org.htmlparser.tags; ! ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! ! public class HeadTag extends CompositeTag { ! ! public HeadTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! ! public String toString() { ! return "HEAD: " + super.toString(); ! } ! } Index: Html.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Html.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** Html.java 24 Aug 2003 21:59:42 -0000 1.23 --- Html.java 3 Sep 2003 23:36:20 -0000 1.24 *************** *** 34,40 **** public class Html extends CompositeTag { ! public Html(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! } --- 34,40 ---- public class Html extends CompositeTag { ! public Html(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! } Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** ImageTag.java 24 Aug 2003 21:59:42 -0000 1.23 --- ImageTag.java 3 Sep 2003 23:36:20 -0000 1.24 *************** *** 37,77 **** public class ImageTag extends Tag { ! public static final String IMAGE_TAG_FILTER="-i"; ! /** ! * The URL where the image is stored. ! */ ! protected String imageURL; /** ! * Constructor creates an HTMLImageNode object, which stores the location ! * where the image is to be found. * @param tagData Specifies character position and content of the tag. ! * @param imageURL Location of the image. ! */ ! public ImageTag(TagData tagData,String imageURL) ! { ! super(tagData); ! this.imageURL = imageURL; ! } ! /** ! * Returns the location of the image ! */ ! public String getImageURL() ! { ! return imageURL; ! } ! public String toString() ! { ! return "IMAGE TAG : Image at "+imageURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! public void setImageURL(String imageURL) { ! this.imageURL = imageURL; setAttribute ("SRC", imageURL); ! } ! public void accept(NodeVisitor visitor) { ! visitor.visitImageTag(this); ! } } --- 37,77 ---- public class ImageTag extends Tag { ! public static final String IMAGE_TAG_FILTER="-i"; ! /** ! * The URL where the image is stored. ! */ ! protected String imageURL; /** ! * Constructor creates an HTMLImageNode object, which stores the location ! * where the image is to be found. * @param tagData Specifies character position and content of the tag. ! * @param imageURL Location of the image. ! */ ! public ImageTag(TagData tagData,String imageURL) ! { ! super(tagData); ! this.imageURL = imageURL; ! } ! /** ! * Returns the location of the image ! */ ! public String getImageURL() ! { ! return imageURL; ! } ! public String toString() ! { ! return "IMAGE TAG : Image at "+imageURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! public void setImageURL(String imageURL) { ! this.imageURL = imageURL; setAttribute ("SRC", imageURL); ! } ! public void accept(NodeVisitor visitor) { ! visitor.visitImageTag(this); ! } } Index: InputTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/InputTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** InputTag.java 24 Aug 2003 21:59:42 -0000 1.23 --- InputTag.java 3 Sep 2003 23:36:20 -0000 1.24 *************** *** 34,43 **** public class InputTag extends Tag { ! public InputTag(TagData tagData) { ! super(tagData); ! } ! ! public String toString() { ! return (ParserUtils.toString(this)); ! } } --- 34,43 ---- public class InputTag extends Tag { ! public InputTag(TagData tagData) { ! super(tagData); ! } ! ! public String toString() { ! return (ParserUtils.toString(this)); ! } } Index: JspTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/JspTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** JspTag.java 24 Aug 2003 21:59:42 -0000 1.25 --- JspTag.java 3 Sep 2003 23:36:20 -0000 1.26 *************** *** 36,59 **** public class JspTag extends Tag { ! /** ! * The HTMLJspTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param tagData The data for this tag. ! */ ! public JspTag(TagData tagData) ! { ! super(tagData); ! } ! ! public String toHtml() { ! return "<%"+tagContents+"%>"; ! } ! ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { ! return "JSP/ASP Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } } --- 36,59 ---- public class JspTag extends Tag { ! /** ! * The HTMLJspTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param tagData The data for this tag. ! */ ! public JspTag(TagData tagData) ! { ! super(tagData); ! } ! ! public String toHtml() { ! return "<%"+tagContents+"%>"; ! } ! ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { ! return "JSP/ASP Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } } Index: LabelTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LabelTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** LabelTag.java 24 Aug 2003 21:59:42 -0000 1.24 --- LabelTag.java 3 Sep 2003 23:36:20 -0000 1.25 *************** *** 37,50 **** public class LabelTag extends CompositeTag { ! public LabelTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! ! public String getLabel() { ! return toPlainTextString(); ! } ! ! public String toString() { ! return "LABEL: "+getLabel(); ! } } --- 37,50 ---- public class LabelTag extends CompositeTag { ! public LabelTag(TagData tagData, CompositeTagData compositeTagData) { ! super(tagData, compositeTagData); ! } ! ! public String getLabel() { ! return toPlainTextString(); ! } ! ! public String toString() { ! return "LABEL: "+getLabel(); ! } } Index: MetaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/MetaTag.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** MetaTag.java 24 Aug 2003 21:59:42 -0000 1.23 --- MetaTag.java 3 Sep 2003 23:36:20 -0000 1.24 *************** *** 35,71 **** */ public class MetaTag extends Tag { ! private String metaTagName; ! private String metaTagContents; ! private String httpEquiv; ! public MetaTag(TagData tagData, String httpEquiv, String metaTagName,String metaTagContents) { ! super(tagData); ! this.httpEquiv = httpEquiv; ! this.metaTagName = metaTagName; ! this.metaTagContents = metaTagContents; ! } ! public String getHttpEquiv() { ! return httpEquiv; ! } ! public String getMetaContent() { ! return metaTagContents; ! } ! public String getMetaTagName() { ! return metaTagName; ! } ! public void setHttpEquiv(String httpEquiv) { ! this.httpEquiv = httpEquiv; ! } ! public void setMetaTagContents(String metaTagContents) { ! this.metaTagContents = metaTagContents; ! } ! public void setMetaTagName(String metaTagName) { ! this.metaTagName = metaTagName; ! } ! public String toString() { ! return "META TAG\n"+ ! "--------\n"+ ! "Http-Equiv : "+getHttpEquiv()+"\n"+ ! "Name : "+metaTagName+"\n"+ ! "Contents : "+metaTagContents+"\n"; ! } } --- 35,71 ---- */ public class MetaTag extends Tag { ! private String metaTagName; ! private String metaTagContents; ! private String httpEquiv; ! public MetaTag(TagData tagData, String httpEquiv, String metaTagName,String metaTagContents) { ! super(tagData); ! this.httpEquiv = httpEquiv; ! this.metaTagName = metaTagName; ! this.metaTagContents = metaTagContents; ! } ! public String getHttpEquiv() { ! return httpEquiv; ! } ! public String getMetaContent() { ! return metaTagContents; ! } ! public String getMetaTagName() { ! return metaTagName; ! } ! public void setHttpEquiv(String httpEquiv) { ! this.httpEquiv = httpEquiv; ! } ! public void setMetaTagContents(String metaTagContents) { ! this.metaTagContents = metaTagContents; ! } ! public void setMetaTagName(String metaTagName) { ! this.metaTagName = metaTagName; ! } ! public String toString() { ! return "META TAG\n"+ ! "--------\n"+ ! "Http-Equiv : "+getHttpEquiv()+"\n"+ ! "Name : "+metaTagName+"\n"+ ! "Contents : "+metaTagContents+"\n"; ! } } Index: OptionTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/OptionTag.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** OptionTag.java 24 Aug 2003... [truncated message content] |
From: <der...@us...> - 2003-09-03 23:36:55
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests Modified Files: AllTests.java AssertXmlEqualsTest.java BadTagIdentifier.java FunctionalTests.java InstanceofPerformanceTest.java LineNumberAssignedByNodeReaderTest.java ParserTest.java ParserTestCase.java PerformanceTest.java Log Message: Change tabs to spaces in all source files. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AllTests.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** AllTests.java 24 Aug 2003 21:59:43 -0000 1.47 --- AllTests.java 3 Sep 2003 23:36:20 -0000 1.48 *************** *** 34,40 **** { ! public AllTests(String name) { ! super(name); ! } /** --- 34,40 ---- { ! public AllTests(String name) { ! super(name); ! } /** *************** *** 49,53 **** * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; --- 49,53 ---- * All other options are passed on to the junit framework. */ ! public static void main(String[] args) { String runner; *************** *** 99,119 **** + ")"); } ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("HTMLParser Tests"); ! TestSuite basic = new TestSuite("Basic Tests"); ! basic.addTestSuite(ParserTest.class); suite.addTest(basic); ! suite.addTest(org.htmlparser.tests.scannersTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.utilTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.tagTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.visitorsTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.parserHelperTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.nodeDecoratorTests.AllTests.suite()); ! suite.addTest(AssertXmlEqualsTest.suite()); ! suite.addTest(LineNumberAssignedByNodeReaderTest.suite()); ! return suite; ! } } --- 99,119 ---- + ")"); } ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("HTMLParser Tests"); ! TestSuite basic = new TestSuite("Basic Tests"); ! basic.addTestSuite(ParserTest.class); suite.addTest(basic); ! suite.addTest(org.htmlparser.tests.scannersTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.utilTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.tagTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.visitorsTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.parserHelperTests.AllTests.suite()); ! suite.addTest(org.htmlparser.tests.nodeDecoratorTests.AllTests.suite()); ! suite.addTest(AssertXmlEqualsTest.suite()); ! suite.addTest(LineNumberAssignedByNodeReaderTest.suite()); ! return suite; ! } } Index: AssertXmlEqualsTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AssertXmlEqualsTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** AssertXmlEqualsTest.java 24 Aug 2003 21:59:43 -0000 1.9 --- AssertXmlEqualsTest.java 3 Sep 2003 23:36:20 -0000 1.10 *************** *** 34,81 **** public class AssertXmlEqualsTest extends ParserTestCase { ! public AssertXmlEqualsTest(String name) { ! super(name); ! } ! ! public void testNestedTagWithText() throws Exception { ! assertXmlEquals("nested with text","<hello> <hi>My name is Nothing</hi></hello>","<hello><hi>My name is Nothing</hi> </hello>"); ! } ! ! public void testThreeTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag><someOtherTag>","<someTag/><someOtherTag>"); ! } ! ! public void testOneTag() throws Exception { ! assertXmlEquals("one tag","<someTag>","<someTag>"); ! } ! public void testTwoTags() throws Exception { ! assertXmlEquals("two tags","<someTag></someTag>","<someTag></someTag>"); ! } ! public void testTwoTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag>","<someTag/>"); ! } ! ! public void testTwoTagsDifferent2() throws Exception { ! assertXmlEquals("two tags different","<someTag/>","<someTag></someTag>"); ! } ! ! public void testTwoTagsWithSameAttributes() throws Exception { ! assertXmlEquals("attributes","<tag name=\"John\" age=\"22\" sex=\"M\"/>","<tag sex=\"M\" name=\"John\" age=\"22\"/>"); ! } ! ! public void testTagWithText() throws Exception { ! assertXmlEquals("text","<hello> My name is Nothing</hello>","<hello>My name is Nothing </hello>"); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! assertXmlEquals("string with line breaks","testing & refactoring","testing &\nrefactoring"); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("XML Tests"); suite.addTestSuite(AssertXmlEqualsTest.class); return (suite); ! } } --- 34,81 ---- public class AssertXmlEqualsTest extends ParserTestCase { ! public AssertXmlEqualsTest(String name) { ! super(name); ! } ! ! public void testNestedTagWithText() throws Exception { ! assertXmlEquals("nested with text","<hello> <hi>My name is Nothing</hi></hello>","<hello><hi>My name is Nothing</hi> </hello>"); ! } ! ! public void testThreeTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag><someOtherTag>","<someTag/><someOtherTag>"); ! } ! ! public void testOneTag() throws Exception { ! assertXmlEquals("one tag","<someTag>","<someTag>"); ! } ! public void testTwoTags() throws Exception { ! assertXmlEquals("two tags","<someTag></someTag>","<someTag></someTag>"); ! } ! public void testTwoTagsDifferent() throws Exception { ! assertXmlEquals("two tags different","<someTag></someTag>","<someTag/>"); ! } ! ! public void testTwoTagsDifferent2() throws Exception { ! assertXmlEquals("two tags different","<someTag/>","<someTag></someTag>"); ! } ! ! public void testTwoTagsWithSameAttributes() throws Exception { ! assertXmlEquals("attributes","<tag name=\"John\" age=\"22\" sex=\"M\"/>","<tag sex=\"M\" name=\"John\" age=\"22\"/>"); ! } ! ! public void testTagWithText() throws Exception { ! assertXmlEquals("text","<hello> My name is Nothing</hello>","<hello>My name is Nothing </hello>"); ! } ! ! public void testStringWithLineBreaks() throws Exception { ! assertXmlEquals("string with line breaks","testing & refactoring","testing &\nrefactoring"); ! } ! ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("XML Tests"); suite.addTestSuite(AssertXmlEqualsTest.class); return (suite); ! } } Index: BadTagIdentifier.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/BadTagIdentifier.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** BadTagIdentifier.java 24 Aug 2003 21:59:43 -0000 1.9 --- BadTagIdentifier.java 3 Sep 2003 23:36:20 -0000 1.10 *************** *** 34,67 **** public class BadTagIdentifier { ! public BadTagIdentifier() { ! super(); ! } ! public static void main(String[] args) ! throws Exception { ! BadTagIdentifier badTags = ! new BadTagIdentifier(); ! badTags.identify("http://www.amazon.com"); ! } ! ! private void identify(String url) ! throws Exception{ ! String [] tagsBeingChecked = ! {"TABLE","DIV","SPAN"}; ! ! Parser parser = ! new Parser(url); ! TagFindingVisitor tagFinder = ! new TagFindingVisitor(tagsBeingChecked, true); ! parser.visitAllNodesWith(tagFinder); ! for (int i=0;i<tagsBeingChecked.length;i++) { ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" begin tags = "+ ! tagFinder.getTagCount(i)); ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" end tags = "+ ! tagFinder.getEndTagCount(i)); ! } ! ! } } --- 34,67 ---- public class BadTagIdentifier { ! public BadTagIdentifier() { ! super(); ! } ! public static void main(String[] args) ! throws Exception { ! BadTagIdentifier badTags = ! new BadTagIdentifier(); ! badTags.identify("http://www.amazon.com"); ! } ! ! private void identify(String url) ! throws Exception{ ! String [] tagsBeingChecked = ! {"TABLE","DIV","SPAN"}; ! ! Parser parser = ! new Parser(url); ! TagFindingVisitor tagFinder = ! new TagFindingVisitor(tagsBeingChecked, true); ! parser.visitAllNodesWith(tagFinder); ! for (int i=0;i<tagsBeingChecked.length;i++) { ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" begin tags = "+ ! tagFinder.getTagCount(i)); ! System.out.println( ! "Number of "+tagsBeingChecked[i]+" end tags = "+ ! tagFinder.getEndTagCount(i)); ! } ! ! } } Index: FunctionalTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/FunctionalTests.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** FunctionalTests.java 24 Aug 2003 21:59:43 -0000 1.40 --- FunctionalTests.java 3 Sep 2003 23:36:20 -0000 1.41 *************** *** 50,133 **** public class FunctionalTests extends TestCase { ! public FunctionalTests(String arg0) { ! super(arg0); ! } ! /** ! * Based on a suspected bug report by Annette Doyle, ! * to check if the no of image tags are correctly ! * identified by the parser ! */ ! public void testNumImageTagsInYahooWithoutRegisteringScanners() throws ParserException { ! // First count the image tags as is ! int imgTagCount; ! imgTagCount = findImageTagCount(); ! try { ! int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); ! } ! catch (ParserException e) { ! throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); ! } ! ! } ! public int findImageTagCount() { ! int imgTagCount = 0; ! try { ! URL url = new URL("http://www.yahoo.com"); ! InputStream is = url.openStream(); ! BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); ! imgTagCount = countImageTagsWithoutHTMLParser(reader); ! is.close(); ! } ! catch (MalformedURLException e) { ! System.err.println("URL was malformed!"); ! } ! catch (IOException e) { ! System.err.println("IO Exception occurred while trying to open stream"); ! } ! return imgTagCount; ! } ! public int countImageTagsWithHTMLParser() throws ParserException { ! Parser parser = new Parser("http://www.yahoo.com",new DefaultParserFeedback()); ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! int parserImgTagCount = 0; ! Node node; ! for (NodeIterator e= parser.elements();e.hasMoreNodes();) { ! node = (Node)e.nextNode(); ! if (node instanceof ImageTag) { ! parserImgTagCount++; ! } ! } ! return parserImgTagCount; ! } ! public int countImageTagsWithoutHTMLParser(BufferedReader reader) throws IOException { ! String line; ! int imgTagCount = 0; ! do { ! line = reader.readLine(); ! if (line!=null) { ! // Check the line for image tags ! String newline = line.toUpperCase(); ! int fromIndex = -1; ! do { ! fromIndex = newline.indexOf("<IMG",fromIndex+1); ! if (fromIndex!=-1) { ! imgTagCount++; ! } ! } ! while (fromIndex!=-1); ! } ! } ! while (line!=null); ! return imgTagCount; ! } ! public static TestSuite suite() { ! return new TestSuite(FunctionalTests.class); ! } } --- 50,133 ---- public class FunctionalTests extends TestCase { ! public FunctionalTests(String arg0) { ! super(arg0); ! } ! /** ! * Based on a suspected bug report by Annette Doyle, ! * to check if the no of image tags are correctly ! * identified by the parser ! */ ! public void testNumImageTagsInYahooWithoutRegisteringScanners() throws ParserException { ! // First count the image tags as is ! int imgTagCount; ! imgTagCount = findImageTagCount(); ! try { ! int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); ! } ! catch (ParserException e) { ! throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); ! } ! ! } ! public int findImageTagCount() { ! int imgTagCount = 0; ! try { ! URL url = new URL("http://www.yahoo.com"); ! InputStream is = url.openStream(); ! BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); ! imgTagCount = countImageTagsWithoutHTMLParser(reader); ! is.close(); ! } ! catch (MalformedURLException e) { ! System.err.println("URL was malformed!"); ! } ! catch (IOException e) { ! System.err.println("IO Exception occurred while trying to open stream"); ! } ! return imgTagCount; ! } ! public int countImageTagsWithHTMLParser() throws ParserException { ! Parser parser = new Parser("http://www.yahoo.com",new DefaultParserFeedback()); ! parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! int parserImgTagCount = 0; ! Node node; ! for (NodeIterator e= parser.elements();e.hasMoreNodes();) { ! node = (Node)e.nextNode(); ! if (node instanceof ImageTag) { ! parserImgTagCount++; ! } ! } ! return parserImgTagCount; ! } ! public int countImageTagsWithoutHTMLParser(BufferedReader reader) throws IOException { ! String line; ! int imgTagCount = 0; ! do { ! line = reader.readLine(); ! if (line!=null) { ! // Check the line for image tags ! String newline = line.toUpperCase(); ! int fromIndex = -1; ! do { ! fromIndex = newline.indexOf("<IMG",fromIndex+1); ! if (fromIndex!=-1) { ! imgTagCount++; ! } ! } ! while (fromIndex!=-1); ! } ! } ! while (line!=null); ! return imgTagCount; ! } ! public static TestSuite suite() { ! return new TestSuite(FunctionalTests.class); ! } } Index: InstanceofPerformanceTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/InstanceofPerformanceTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** InstanceofPerformanceTest.java 24 Aug 2003 21:59:43 -0000 1.11 --- InstanceofPerformanceTest.java 3 Sep 2003 23:36:20 -0000 1.12 *************** *** 40,108 **** public class InstanceofPerformanceTest { ! FormTag formTag; ! Vector formChildren; ! public void setUp() throws Exception { ! Parser parser = ! Parser.createParser( ! FormScannerTest.FORM_HTML ! ); ! parser.registerScanners(); ! NodeIterator e = parser.elements(); ! Node node = e.nextNode(); ! formTag = (FormTag)node; ! formChildren = new Vector(); ! for (SimpleNodeIterator se = formTag.children();se.hasMoreNodes();) { ! formChildren.addElement(se.nextNode()); ! } ! } ! ! public void doInstanceofTest(long [] time,int index, long numTimes) { ! System.out.println("doInstanceofTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (Enumeration e = formChildren.elements();e.hasMoreElements();) { ! Node node = (Node)e.nextElement(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! ! public void doGetTypeTest(long [] time,int index, long numTimes) { ! System.out.println("doGetTypeTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (SimpleNodeIterator e = formTag.children();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! public void perform() { ! int numTimes = 30; ! long time1[] = new long[numTimes], ! time2[] = new long[numTimes]; ! ! for (int i=0;i<numTimes;i++) ! doInstanceofTest(time1,i,i*10000); ! ! for (int i=0;i<numTimes;i++) ! doGetTypeTest(time2,i,i*10000); ! ! print(time1,time2); ! } ! public void print(long [] time1, long [] time2) { ! for (int i=0;i<time1.length;i++) { ! System.out.println(i*1000000+":"+","+time1[i]+" "+time2[i]); ! } ! } ! public static void main(String [] args) throws Exception { ! InstanceofPerformanceTest test = ! new InstanceofPerformanceTest(); ! test.setUp(); ! test.perform(); ! } } --- 40,108 ---- public class InstanceofPerformanceTest { ! FormTag formTag; ! Vector formChildren; ! public void setUp() throws Exception { ! Parser parser = ! Parser.createParser( ! FormScannerTest.FORM_HTML ! ); ! parser.registerScanners(); ! NodeIterator e = parser.elements(); ! Node node = e.nextNode(); ! formTag = (FormTag)node; ! formChildren = new Vector(); ! for (SimpleNodeIterator se = formTag.children();se.hasMoreNodes();) { ! formChildren.addElement(se.nextNode()); ! } ! } ! ! public void doInstanceofTest(long [] time,int index, long numTimes) { ! System.out.println("doInstanceofTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (Enumeration e = formChildren.elements();e.hasMoreElements();) { ! Node node = (Node)e.nextElement(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! ! public void doGetTypeTest(long [] time,int index, long numTimes) { ! System.out.println("doGetTypeTest("+index+")"); ! long start = System.currentTimeMillis(); ! for (long i=0;i<numTimes;i++) { ! for (SimpleNodeIterator e = formTag.children();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! } ! } ! long end = System.currentTimeMillis(); ! time[index] = end-start; ! } ! public void perform() { ! int numTimes = 30; ! long time1[] = new long[numTimes], ! time2[] = new long[numTimes]; ! ! for (int i=0;i<numTimes;i++) ! doInstanceofTest(time1,i,i*10000); ! ! for (int i=0;i<numTimes;i++) ! doGetTypeTest(time2,i,i*10000); ! ! print(time1,time2); ! } ! public void print(long [] time1, long [] time2) { ! for (int i=0;i<time1.length;i++) { ! System.out.println(i*1000000+":"+","+time1[i]+" "+time2[i]); ! } ! } ! public static void main(String [] args) throws Exception { ! InstanceofPerformanceTest test = ! new InstanceofPerformanceTest(); ! test.setUp(); ! test.perform(); ! } } Index: LineNumberAssignedByNodeReaderTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/LineNumberAssignedByNodeReaderTest.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** LineNumberAssignedByNodeReaderTest.java 24 Aug 2003 21:59:43 -0000 1.18 --- LineNumberAssignedByNodeReaderTest.java 3 Sep 2003 23:36:20 -0000 1.19 *************** *** 46,123 **** public class LineNumberAssignedByNodeReaderTest extends ParserTestCase { ! public LineNumberAssignedByNodeReaderTest(String name) { ! super(name); ! } ! ! /** ! * Test to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @throws ParserException if there is a problem parsing the test data ! */ ! public void testLineNumbers() throws ParserException { ! testLineNumber("<Custom/>", 1, 0, 1, 1); ! testLineNumber("<Custom />", 1, 0, 1, 1); ! testLineNumber("<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content</Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber( ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 1, 0, 1, 3 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! char[] oneHundredNewLines = new char[100]; ! Arrays.fill(oneHundredNewLines, '\n'); ! testLineNumber( ! "Foo\n" + ! new String(oneHundredNewLines) + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 102, 104 ! ); ! } ! ! /** ! * Helper method to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @param xml String containing HTML or XML to parse, containing a Custom tag ! * @param numNodes int number of expected nodes returned by parser ! * @param useNode int index of the node to test (should be of type CustomTag) ! * @param startLine int the expected start line number of the tag ! * @param endLine int the expected end line number of the tag ! * @throws ParserException if there is an exception during parsing ! */ ! private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { ! createParser(xml); ! parser.addScanner(new CustomScanner()); ! parseAndAssertNodeCount(numNodes); ! assertType("custom node",CustomTag.class,node[useNode]); ! CustomTag tag = (CustomTag)node[useNode]; ! assertEquals("start line", expectedStartLine, tag.tagData.getStartLine()); ! assertEquals("end line", expectedEndLine, tag.tagData.getEndLine()); ! ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Line Number Tests"); suite.addTestSuite(LineNumberAssignedByNodeReaderTest.class); return (suite); ! } } --- 46,123 ---- public class LineNumberAssignedByNodeReaderTest extends ParserTestCase { ! public LineNumberAssignedByNodeReaderTest(String name) { ! super(name); ! } ! ! /** ! * Test to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @throws ParserException if there is a problem parsing the test data ! */ ! public void testLineNumbers() throws ParserException { ! testLineNumber("<Custom/>", 1, 0, 1, 1); ! testLineNumber("<Custom />", 1, 0, 1, 1); ! testLineNumber("<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content</Custom>", 1, 0, 1, 1); ! testLineNumber("<Custom>Content<Custom></Custom>", 1, 0, 1, 1); ! testLineNumber( ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 1, 0, 1, 3 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " Content\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! testLineNumber( ! "Foo\n" + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 2, 4 ! ); ! char[] oneHundredNewLines = new char[100]; ! Arrays.fill(oneHundredNewLines, '\n'); ! testLineNumber( ! "Foo\n" + ! new String(oneHundredNewLines) + ! "<Custom>\n" + ! " <Custom>SubContent</Custom>\n" + ! "</Custom>", ! 2, 1, 102, 104 ! ); ! } ! ! /** ! * Helper method to ensure that the <code>Tag</code> being created by the ! * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. ! * @param xml String containing HTML or XML to parse, containing a Custom tag ! * @param numNodes int number of expected nodes returned by parser ! * @param useNode int index of the node to test (should be of type CustomTag) ! * @param startLine int the expected start line number of the tag ! * @param endLine int the expected end line number of the tag ! * @throws ParserException if there is an exception during parsing ! */ ! private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { ! createParser(xml); ! parser.addScanner(new CustomScanner()); ! parseAndAssertNodeCount(numNodes); ! assertType("custom node",CustomTag.class,node[useNode]); ! CustomTag tag = (CustomTag)node[useNode]; ! assertEquals("start line", expectedStartLine, tag.tagData.getStartLine()); ! assertEquals("end line", expectedEndLine, tag.tagData.getEndLine()); ! ! } ! public static TestSuite suite() { ! TestSuite suite = new TestSuite("Line Number Tests"); suite.addTestSuite(LineNumberAssignedByNodeReaderTest.class); return (suite); ! } } Index: ParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** ParserTest.java 24 Aug 2003 21:59:43 -0000 1.38 --- ParserTest.java 3 Sep 2003 23:36:20 -0000 1.39 *************** *** 53,111 **** public class ParserTest extends ParserTestCase { ! public ParserTest(String name) { ! super(name); ! } ! public void testElements() throws Exception { ! StringBuffer hugeData = new StringBuffer(); ! for (int i=0;i<5001;i++) hugeData.append('a'); ! createParser(hugeData.toString()); ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be 1 node identified",1,i); ! // Now try getting the elements again ! // i = 0; ! // reader.reset(); ! // reader.setLineCount(1); ! // reader.setPosInLine(-1); ! // for (HTMLEnumeration e = parser.elements();e.hasMoreNodes();) ! // { ! // node[i++] = e.nextHTMLNode(); ! // } ! // assertEquals("There should be 1 node identified (second call to parser.elements())",1,i); ! } ! /** ! * This testcase needs you to be online. ! */ ! public void testElementsFromWeb() throws Exception { ! Parser parser; ! try { ! parser = new Parser("http://www.google.com"); ! } ! catch (Exception e ){ ! throw new ParserException("You must be offline! This test needs you to be connected to the internet.",e); ! } ! parser.getReader().mark(5000); ! Node [] node = new AbstractNode[500]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! int cnt = i; ! parser.getReader().reset(); ! // Now try getting the elements again ! i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); ! } ! /** * Test the Parser(URLConnection) constructor. --- 53,111 ---- public class ParserTest extends ParserTestCase { ! public ParserTest(String name) { ! super(name); ! } ! public void testElements() throws Exception { ! StringBuffer hugeData = new StringBuffer(); ! for (int i=0;i<5001;i++) hugeData.append('a'); ! createParser(hugeData.toString()); ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be 1 node identified",1,i); ! // Now try getting the elements again ! // i = 0; ! // reader.reset(); ! // reader.setLineCount(1); ! // reader.setPosInLine(-1); ! // for (HTMLEnumeration e = parser.elements();e.hasMoreNodes();) ! // { ! // node[i++] = e.nextHTMLNode(); ! // } ! // assertEquals("There should be 1 node identified (second call to parser.elements())",1,i); ! } ! /** ! * This testcase needs you to be online. ! */ ! public void testElementsFromWeb() throws Exception { ! Parser parser; ! try { ! parser = new Parser("http://www.google.com"); ! } ! catch (Exception e ){ ! throw new ParserException("You must be offline! This test needs you to be connected to the internet.",e); ! } ! parser.getReader().mark(5000); ! Node [] node = new AbstractNode[500]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! int cnt = i; ! parser.getReader().reset(); ! // Now try getting the elements again ! i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! { ! node[i++] = e.nextNode(); ! } ! assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); ! } ! /** * Test the Parser(URLConnection) constructor. *************** *** 114,118 **** * <pre> * <form NAME="SearchQuick" method="POST" action="cp_search_response-e.asp" ! * onSubmit="return runSubmit();"> * * <!-- begin test hidden field code --> --- 114,118 ---- * <pre> * <form NAME="SearchQuick" method="POST" action="cp_search_response-e.asp" ! * onSubmit="return runSubmit();"> * * <!-- begin test hidden field code --> *************** *** 190,196 **** * <tr> * <td colspan="2" align="right" nowrap> ! * <input type="image" src="images/bb_submit-e.gif" name="Search" border="0" WIDTH="88" HEIGHT="23"> * &nbsp; <a href="#" onclick="javascript:fClearAllFields();"><img src="images/bb_clear_form-e.gif" name="Clear" border="0" WIDTH="88" HEIGHT="23"></a> ! * </td> * </tr> * </table> --- 190,196 ---- * <tr> * <td colspan="2" align="right" nowrap> ! * <input type="image" src="images/bb_submit-e.gif" name="Search" border="0" WIDTH="88" HEIGHT="23"> * &nbsp; <a href="#" onclick="javascript:fClearAllFields();"><img src="images/bb_clear_form-e.gif" name="Clear" border="0" WIDTH="88" HEIGHT="23"></a> ! * </td> * </tr> * </table> *************** *** 210,214 **** final String postal_code = "K2B 7V4"; ! Parser parser; URL url; HttpURLConnection connection; --- 210,214 ---- final String postal_code = "K2B 7V4"; ! Parser parser; URL url; HttpURLConnection connection; *************** *** 289,302 **** out.print (buffer); out.close (); ! parser = new Parser (connection); ! } ! catch (Exception e) { ! throw new ParserException ("You must be offline! This test needs you to be connected to the internet.", e); ! } pass = false; ! for (enumeration = parser.elements (); enumeration.hasMoreNodes ();) ! { node = enumeration.nextNode (); if (node instanceof StringNode) --- 289,302 ---- out.print (buffer); out.close (); ! parser = new Parser (connection); ! } ! catch (Exception e) { ! throw new ParserException ("You must be offline! This test needs you to be connected to the internet.", e); ! } pass = false; ! for (enumeration = parser.elements (); enumeration.hasMoreNodes ();) ! { node = enumeration.nextNode (); if (node instanceof StringNode) *************** *** 306,312 **** pass = true; } ! } ! assertTrue("POST operation failed.", pass); ! } /** --- 306,312 ---- pass = true; } ! } ! assertTrue("POST operation failed.", pass); ! } /** *************** *** 368,381 **** public void testHTTPCharset () { ! Parser parser; ! try { ! parser = new Parser("http://www.ibm.com/jp/", Parser.noFeedback); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.ibm.com/jp/"); ! } } --- 368,381 ---- public void testHTTPCharset () { ! Parser parser; ! try { ! parser = new Parser("http://www.ibm.com/jp/", Parser.noFeedback); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.ibm.com/jp/"); ! } } *************** *** 388,405 **** public void testHTMLCharset () { ! Parser parser; NodeIterator enumeration; ! try { ! parser = new Parser("http://www.sony.co.jp", Parser.noFeedback); ! assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ()); enumeration = parser.elements(); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.sony.co.jp"); ! } } --- 388,405 ---- public void testHTMLCharset () { ! Parser parser; NodeIterator enumeration; ! try { ! parser = new Parser("http://www.sony.co.jp", Parser.noFeedback); ! assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ()); enumeration = parser.elements(); ! assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS")); ! } ! catch (ParserException e) { fail ("could not open http://www.sony.co.jp"); ! } } *************** *** 409,424 **** * and bug #699886 can't parse website other than iso-8859-1 */ ! public void testSwitchCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html"; int i; Node[] nodes; ! ! parser = new Parser(url); i = 0; nodes = new AbstractNode[30]; ! for (NodeIterator e = parser.elements(); e.hasMoreNodes();) ! nodes[i++] = e.nextNode(); assertEquals ("Expected nodes", 14, i); } --- 409,424 ---- * and bug #699886 can't parse website other than iso-8859-1 */ ! public void testSwitchCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html"; int i; Node[] nodes; ! ! parser = new Parser(url); i = 0; nodes = new AbstractNode[30]; ! for (NodeIterator e = parser.elements(); e.hasMoreNodes();) ! nodes[i++] = e.nextNode(); assertEquals ("Expected nodes", 14, i); } *************** *** 433,444 **** * Nonetheless, it would be nice to handle this case. */ ! public void testDoubleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } --- 433,444 ---- * Nonetheless, it would be nice to handle this case. */ ! public void testDoubleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } *************** *** 453,464 **** * Nonetheless, it would be nice to handle this case. */ ! public void testSingleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } --- 453,464 ---- * Nonetheless, it would be nice to handle this case. */ ! public void testSingleQuotedCharset () throws ParserException { ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; ! ! parser = new Parser(url); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) ! e.nextNode(); assertTrue ("Wrong encoding", parser.getEncoding ().equals ("UTF-8")); } *************** *** 472,481 **** * AOL it would be nice to handle this case. */ ! public void testCommaListCharset () throws ParserException { URL url; URLConnection connection; ! Parser parser; ! String idiots = "http://users.aol.com/geinster/rej.htm"; try --- 472,481 ---- * AOL it would be nice to handle this case. */ ! public void testCommaListCharset () throws ParserException { URL url; URLConnection connection; ! Parser parser; ! String idiots = "http://users.aol.com/geinster/rej.htm"; try *************** *** 537,696 **** public void testNullUrl() { ! Parser parser; ! try { ! parser = new Parser("http://someoneexisting.com", Parser.noFeedback); ! assertTrue("Should have thrown an exception!",false); ! } ! catch (ParserException e) { ! ! } ! } ! ! public void testURLWithSpaces() throws ParserException{ ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! ! parser = new Parser(url); ! Node node [] = new AbstractNode[30]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! node[i] = e.nextNode(); ! i++; ! ! } ! assertEquals("Expected nodes",12,i); ! } ! public void testLinkCollection() throws ParserException { ! createParser( ! "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"><title>Google</title><style><!--\n"+ ! "body,td,a,p,.h{font-family:arial,sans-serif;} .h{font-size: 20px;} .h{color:} .q{text-decoration:none; color:#0000cc;}\n"+ ! "//--></style>\n"+ ! "<script>\n"+ ! "<!--\n"+ ! "function sf(){document.f.q.focus();}\n"+ ! "function c(p){var f=document.f;if (f.action) {f.action = 'http://'+p;f.submit();return false;}return true;}\n"+ ! "// -->\n"+ ! "</script>\n"+ ! "</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 cellpadding=0><tr><td><img src=\"images/logo.gif\" width=276 height=110 alt=\"Google\"></td></tr></table><br>\n"+ ! "<table border=0 cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=15> </td>" + ! "<td id=0 bgcolor=#3366cc align=center width=95 nowrap>" + ! "<font color=#ffffff size=-1><b>Web</b></font>" + ! "</td>" + ! "<td width=15> </td>" + ! "<td id=1 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/imghp');\" style=cursor:pointer;cursor:hand;><a id=1a class=q href=\"/imghp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/imghp');\"><font size=-1>Images</font></a></td><td width=15> </td><td id=2 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/grphp');\" style=cursor:pointer;cursor:hand;><a id=2a class=q href=\"/grphp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/grphp');\"><font size=-1>Groups</font></a></td><td width=15> </td><td id=3 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/dirhp');\" style=cursor:pointer;cursor:hand;><a id=3a class=q href=\"/dirhp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/dirhp');\"><font size=-1>Directory</font></a></td><td width=15> </td><td id=4 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/nwshp');\" style=cursor:pointer;cursor:hand;><a id=4a class=q href=\"/nwshp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/nwshp');\"><font size=-1><nobr>News-<font color=red>New!</font></nobr></font></a></td><td width=15> </td></tr><tr><td colspan=12 bgcolor=#3366cc><img width=1 height=1 alt=\"\">" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "<br>" + ! "<form action=\"/search\" name=f>" + ! "<table cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=75> </td>" + ! "<td align=center>" + ! "<input type=hidden name=hl value=en>" + ! "<input type=hidden name=ie value=\"UTF-8\">" + ! "<input type=hidden name=oe value=\"UTF-8\">" + ! "<input maxLength=256 size=55 name=q value=\"\"><br>" + ! "<input type=submit value=\"Google Search\" name=btnG>" + ! "<input type=submit value=\"I'm Feeling Lucky\" name=btnI>" + ! "</td>" + ! "<td valign=top nowrap>" + ! "<font size=-2> • <a href=/advanced_search?hl=en>Advanced Search</a>" + ! "<br> • <a href=/preferences?hl=en>Preferences</a>" + ! "<br> • <a href=/language_tools?hl=en>Language Tools</a>" + ! "</font>" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</form><br>\n"+ ! "<br><font size=-1><a href=\"/ads/\">Advertise with Us</a> - <a href=\"/services/\">Search Solutions</a> - <a href=\"/options/\">Services & Tools</a> - <a href=/about.html>Jobs, Press, & Help</a><span id=hp style=\"behavior:url(#default#homepage)\"></span>\n"+ ! "<script>\n"+ ! "if (!hp.isHomePage('http://www.google.com/')) {document.write(\"<p><a href=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">Make Google Your Homepage!</a>\");}\n"+ ! "</script></font>\n"+ ! "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ! ); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,LinkTag.class); ! } ! assertEquals("Size of collection vector should be 11",11,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only links should have been parsed",node instanceof LinkTag); ! } ! } ! public void testImageCollection() throws ParserException { ! createParser( ! "<html>\n"+ ! "<head>\n"+ ! "<meta name=\"generator\" content=\"Created Using Yahoo! PageBuilder 2.60.24\">\n"+ ! "</head>\n"+ ! "<body bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#FF0000\" text=\"#000000\"\n"+ ! " onLoad=\"window.onresize=new Function('if (navigator.appVersion==\'Netscape\') history.go(0);');\">\n"+ ! "<div id=\"layer0\" style=\"position:absolute;left:218;top:40;width:240;height:26;\">\n"+ ! "<table width=240 height=26 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><b><font size=\"+2\"><span style=\"font-size:24\">NISHI-HONGWAN-JI</span></font></b></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer1\" style=\"position:absolute;left:75;top:88;width:542;height:83;\">\n"+ ! "<table width=542 height=83 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">The Nihi Hongwanj-ji temple is very traditional, very old, and very beautiful. This is the place that we stayed on our first night in Kyoto. We then attended the morning prayer ceremony, at 6:30 am. Staying here costed us 7,500 yen, which was inclusive of dinner and breakfast, and usage of the o-furo (public bath). Felt more like a luxury hotel than a temple.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer2\" style=\"position:absolute;left:144;top:287;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji1.html\"><img height=96 width=128 src=\"nishi-hongwanji1-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer3\" style=\"position:absolute;left:415;top:285;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji3.html\"><img height=96 width=128 src=\"nishi-hongwanji2-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer4\" style=\"position:absolute;left:414;top:182;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"higashi-hongwanji.html\"><img height=96 width=128 src=\"higashi-hongwanji-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer5\" style=\"position:absolute;left:78;top:396;width:530;height:49;\">\n"+ ! "<table width=530 height=49 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">Click on the pictures to see the full-sized versions. The picture at the top right corner is taken in Higashi-Hongwanji. Nishi means west, and Higashi means east. These two temples are adjacent to each other and represent two different Buddhist sects.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer6\" style=\"position:absolute;left:143;top:180;width:128;height:102;\">\n"+ ! "<table width=128 height=102 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji4.html\"><img height=102 width=128 src=\"nishi-hongwanji4-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer7\" style=\"position:absolute;left:280;top:235;width:124;height:99;\">\n"+ ! "<table width=124 height=99 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji-lodging.html\"><img height=99 width=124 src=\"nishi-hongwanji-lodging-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "</body>\n"+ ! "</html>"); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,ImageTag.IMAGE_TAG_FILTER); ! } ! assertEquals("Size of collection vector should be 5",5,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only images should have been parsed",node instanceof ImageTag); ! } ! } ! public void testRemoveScanner() throws Exception { ! createParser( ! "" ! ); ! parser.registerScanners(); ! parser.removeScanner(new FormScanner("",parser)); ! Map scanners = parser.getScanners(); ! TagScanner scanner = (TagScanner)scanners.get("FORM"); ! assertNull("shouldnt have found scanner",scanner); ! } /** --- 537,696 ---- public void testNullUrl() { ! Parser parser; ! try { ! parser = new Parser("http://someoneexisting.com", Parser.noFeedback); ! assertTrue("Should have thrown an exception!",false); ! } ! catch (ParserException e) { ! ! } ! } ! ! public void testURLWithSpaces() throws ParserException{ ! Parser parser; ! String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! ! parser = new Parser(url); ! Node node [] = new AbstractNode[30]; ! int i = 0; ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! node[i] = e.nextNode(); ! i++; ! ! } ! assertEquals("Expected nodes",12,i); ! } ! public void testLinkCollection() throws ParserException { ! createParser( ! "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"><title>Google</title><style><!--\n"+ ! "body,td,a,p,.h{font-family:arial,sans-serif;} .h{font-size: 20px;} .h{color:} .q{text-decoration:none; color:#0000cc;}\n"+ ! "//--></style>\n"+ ! "<script>\n"+ ! "<!--\n"+ ! "function sf(){document.f.q.focus();}\n"+ ! "function c(p){var f=document.f;if (f.action) {f.action = 'http://'+p;f.submit();return false;}return true;}\n"+ ! "// -->\n"+ ! "</script>\n"+ ! "</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 cellpadding=0><tr><td><img src=\"images/logo.gif\" width=276 height=110 alt=\"Google\"></td></tr></table><br>\n"+ ! "<table border=0 cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=15> </td>" + ! "<td id=0 bgcolor=#3366cc align=center width=95 nowrap>" + ! "<font color=#ffffff size=-1><b>Web</b></font>" + ! "</td>" + ! "<td width=15> </td>" + ! "<td id=1 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/imghp');\" style=cursor:pointer;cursor:hand;><a id=1a class=q href=\"/imghp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/imghp');\"><font size=-1>Images</font></a></td><td width=15> </td><td id=2 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/grphp');\" style=cursor:pointer;cursor:hand;><a id=2a class=q href=\"/grphp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/grphp');\"><font size=-1>Groups</font></a></td><td width=15> </td><td id=3 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/dirhp');\" style=cursor:pointer;cursor:hand;><a id=3a class=q href=\"/dirhp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/dirhp');\"><font size=-1>Directory</font></a></td><td width=15> </td><td id=4 bgcolor=#efefef align=center width=95 nowrap onClick=\"return c('www.google.com/nwshp');\" style=cursor:pointer;cursor:hand;><a id=4a class=q href=\"/nwshp?hl=en&ie=UTF-8&oe=UTF-8\" onClick=\"return c('www.google.com/nwshp');\"><font size=-1><nobr>News-<font color=red>New!</font></nobr></font></a></td><td width=15> </td></tr><tr><td colspan=12 bgcolor=#3366cc><img width=1 height=1 alt=\"\">" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "<br>" + ! "<form action=\"/search\" name=f>" + ! "<table cellspacing=0 cellpadding=0>" + ! "<tr>" + ! "<td width=75> </td>" + ! "<td align=center>" + ! "<input type=hidden name=hl value=en>" + ! "<input type=hidden name=ie value=\"UTF-8\">" + ! "<input type=hidden name=oe value=\"UTF-8\">" + ! "<input maxLength=256 size=55 name=q value=\"\"><br>" + ! "<input type=submit value=\"Google Search\" name=btnG>" + ! "<input type=submit value=\"I'm Feeling Lucky\" name=btnI>" + ! "</td>" + ! "<td valign=top nowrap>" + ! "<font size=-2> • <a href=/advanced_search?hl=en>Advanced Search</a>" + ! "<br> • <a href=/preferences?hl=en>Preferences</a>" + ! "<br> • <a href=/language_tools?hl=en>Language Tools</a>" + ! "</font>" + ! "</td>" + ! "</tr>" + ! "</table>" + ! "</form><br>\n"+ ! "<br><font size=-1><a href=\"/ads/\">Advertise with Us</a> - <a href=\"/services/\">Search Solutions</a> - <a href=\"/options/\">Services & Tools</a> - <a href=/about.html>Jobs, Press, & Help</a><span id=hp style=\"behavior:url(#default#homepage)\"></span>\n"+ ! "<script>\n"+ ! "if (!hp.isHomePage('http://www.google.com/')) {document.write(\"<p><a href=\"/mgyhp.html\" onClick=\"style.behavior='url(#default#homepage)';setHomePage('http://www.google.com/');\">Make Google Your Homepage!</a>\");}\n"+ ! "</script></font>\n"+ ! "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ! ); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,LinkTag.class); ! } ! assertEquals("Size of collection vector should be 11",11,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only links should have been parsed",node instanceof LinkTag); ! } ! } ! public void testImageCollection() throws ParserException { ! createParser( ! "<html>\n"+ ! "<head>\n"+ ! "<meta name=\"generator\" content=\"Created Using Yahoo! PageBuilder 2.60.24\">\n"+ ! "</head>\n"+ ! "<body bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#FF0000\" text=\"#000000\"\n"+ ! " onLoad=\"window.onresize=new Function('if (navigator.appVersion==\'Netscape\') history.go(0);');\">\n"+ ! "<div id=\"layer0\" style=\"position:absolute;left:218;top:40;width:240;height:26;\">\n"+ ! "<table width=240 height=26 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><b><font size=\"+2\"><span style=\"font-size:24\">NISHI-HONGWAN-JI</span></font></b></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer1\" style=\"position:absolute;left:75;top:88;width:542;height:83;\">\n"+ ! "<table width=542 height=83 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">The Nihi Hongwanj-ji temple is very traditional, very old, and very beautiful. This is the place that we stayed on our first night in Kyoto. We then attended the morning prayer ceremony, at 6:30 am. Staying here costed us 7,500 yen, which was inclusive of dinner and breakfast, and usage of the o-furo (public bath). Felt more like a luxury hotel than a temple.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer2\" style=\"position:absolute;left:144;top:287;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji1.html\"><img height=96 width=128 src=\"nishi-hongwanji1-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer3\" style=\"position:absolute;left:415;top:285;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji3.html\"><img height=96 width=128 src=\"nishi-hongwanji2-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer4\" style=\"position:absolute;left:414;top:182;width:128;height:96;\">\n"+ ! "<table width=128 height=96 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"higashi-hongwanji.html\"><img height=96 width=128 src=\"higashi-hongwanji-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer5\" style=\"position:absolute;left:78;top:396;width:530;height:49;\">\n"+ ! "<table width=530 height=49 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><span style=\"font-size:14\">Click on the pictures to see the full-sized versions. The picture at the top right corner is taken in Higashi-Hongwanji. Nishi means west, and Higashi means east. These two temples are adjacent to each other and represent two different Buddhist sects.</span></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer6\" style=\"position:absolute;left:143;top:180;width:128;height:102;\">\n"+ ! "<table width=128 height=102 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji4.html\"><img height=102 width=128 src=\"nishi-hongwanji4-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "<div id=\"layer7\" style=\"position:absolute;left:280;top:235;width:124;height:99;\">\n"+ ! "<table width=124 height=99 border=0 cellpadding=0 cellspacing=0><tr valign=\"top\">\n"+ ! "<td><a href=\"nishi-hongwanji-lodging.html\"><img height=99 width=124 src=\"nishi-hongwanji-lodging-thumb.jpg\" border=0 ></a></td>\n"+ ! "</tr></table></div>\n"+ ! "</body>\n"+ ! "</html>"); ! parser.registerScanners(); ! NodeList collectionList = new NodeList(); ! for (NodeIterator e = parser.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! node.collectInto(collectionList,ImageTag.IMAGE_TAG_FILTER); ! } ! assertEquals("Size of collection vector should be 5",5,collectionList.size()); ! // All items in collection vector should be links ! for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) { ! Node node = e.nextNode(); ! assertTrue("Only images sh... [truncated message content] |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv31228/scanners Modified Files: AppletScanner.java BaseHrefScanner.java BodyScanner.java BulletListScanner.java BulletScanner.java CompositeTagScanner.java DivScanner.java DoctypeScanner.java FormScanner.java FrameScanner.java FrameSetScanner.java HeadScanner.java HtmlScanner.java ImageScanner.java InputTagScanner.java JspScanner.java LabelScanner.java LinkScanner.java MetaTagScanner.java OptionTagScanner.java ScriptScanner.java SelectTagScanner.java SpanScanner.java StyleScanner.java TableColumnScanner.java TableRowScanner.java TableScanner.java TagScanner.java TextareaTagScanner.java TitleScanner.java Log Message: Change tabs to spaces in all source files. Index: AppletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/AppletScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** AppletScanner.java 24 Aug 2003 21:59:42 -0000 1.28 --- AppletScanner.java 3 Sep 2003 23:36:19 -0000 1.29 *************** *** 39,61 **** */ public class AppletScanner extends CompositeTagScanner { ! private static String [] MATCH_STRING = {"APPLET"}; ! ! public AppletScanner() { ! super(MATCH_STRING); ! } ! ! public AppletScanner(String filter) { ! super(filter,MATCH_STRING); ! } ! public String [] getID() { ! return MATCH_STRING; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! ! return new AppletTag(tagData,compositeTagData); ! } } --- 39,61 ---- */ public class AppletScanner extends CompositeTagScanner { ! private static String [] MATCH_STRING = {"APPLET"}; ! ! public AppletScanner() { ! super(MATCH_STRING); ! } ! ! public AppletScanner(String filter) { ! super(filter,MATCH_STRING); ! } ! public String [] getID() { ! return MATCH_STRING; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! ! return new AppletTag(tagData,compositeTagData); ! } } Index: BaseHrefScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BaseHrefScanner.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** BaseHrefScanner.java 24 Aug 2003 21:59:42 -0000 1.22 --- BaseHrefScanner.java 3 Sep 2003 23:36:19 -0000 1.23 *************** *** 36,65 **** public class BaseHrefScanner extends TagScanner { ! private LinkProcessor processor; ! public BaseHrefScanner() { ! super(); ! } ! public BaseHrefScanner(String filter,LinkProcessor processor) { ! super(filter); ! this.processor = processor; ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "BASE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String baseUrl = (String)tag.getAttribute("HREF"); ! String absoluteBaseUrl=""; ! if (baseUrl != null && baseUrl.length()>0) { ! absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); ! processor.setBaseUrl(absoluteBaseUrl); ! } ! return new BaseHrefTag(tagData,absoluteBaseUrl); ! } } --- 36,65 ---- public class BaseHrefScanner extends TagScanner { ! private LinkProcessor processor; ! public BaseHrefScanner() { ! super(); ! } ! public BaseHrefScanner(String filter,LinkProcessor processor) { ! super(filter); ! this.processor = processor; ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "BASE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String baseUrl = (String)tag.getAttribute("HREF"); ! String absoluteBaseUrl=""; ! if (baseUrl != null && baseUrl.length()>0) { ! absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); ! processor.setBaseUrl(absoluteBaseUrl); ! } ! return new BaseHrefTag(tagData,absoluteBaseUrl); ! } } Index: BodyScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BodyScanner.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** BodyScanner.java 24 Aug 2003 21:59:42 -0000 1.14 --- BodyScanner.java 3 Sep 2003 23:36:19 -0000 1.15 *************** *** 39,63 **** */ public class BodyScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"BODY"}; ! private static final String ENDERS [] = {}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! public BodyScanner() { ! this(""); ! } ! ! public BodyScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new BodyTag(tagData,compositeTagData); ! } } --- 39,63 ---- */ public class BodyScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"BODY"}; ! private static final String ENDERS [] = {}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! public BodyScanner() { ! this(""); ! } ! ! public BodyScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new BodyTag(tagData,compositeTagData); ! } } Index: BulletListScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletListScanner.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BulletListScanner.java 24 Aug 2003 21:59:42 -0000 1.13 --- BulletListScanner.java 3 Sep 2003 23:36:19 -0000 1.14 *************** *** 40,68 **** public class BulletListScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = { "UL", "OL" }; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private Stack ulli = new Stack(); ! ! public BulletListScanner(Parser parser) { ! this("",parser); ! } ! public BulletListScanner(String filter, Parser parser) { ! super(filter, MATCH_STRING, ENDERS); ! parser.addScanner(new BulletScanner("-bullet",ulli)); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new BulletList(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! public void beforeScanningStarts() { ! ulli.push(this); ! } } --- 40,68 ---- public class BulletListScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = { "UL", "OL" }; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private Stack ulli = new Stack(); ! ! public BulletListScanner(Parser parser) { ! this("",parser); ! } ! public BulletListScanner(String filter, Parser parser) { ! super(filter, MATCH_STRING, ENDERS); ! parser.addScanner(new BulletScanner("-bullet",ulli)); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new BulletList(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! public void beforeScanningStarts() { ! ulli.push(this); ! } } Index: BulletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletScanner.java,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** BulletScanner.java 24 Aug 2003 21:59:42 -0000 1.18 --- BulletScanner.java 3 Sep 2003 23:36:19 -0000 1.19 *************** *** 47,89 **** */ public class BulletScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = {"LI"}; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private final static String END_TAG_ENDERS [] = { "UL" }; ! private Stack ulli; ! ! public BulletScanner(Stack ulli) { ! this("",ulli); ! } ! public BulletScanner(String filter, Stack ulli) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); ! this.ulli = ulli; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new Bullet(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! /** ! * This is the logic that decides when a bullet tag can be allowed ! */ ! public boolean shouldCreateEndTagAndExit() { ! if (ulli.size()==0) return false; ! CompositeTagScanner parentScanner = (CompositeTagScanner)ulli.peek(); ! if (parentScanner == this) { ! ulli.pop(); ! return true; ! } else ! return false; ! } ! public void beforeScanningStarts() { ! ulli.push(this); ! } } --- 47,89 ---- */ public class BulletScanner extends CompositeTagScanner { ! private static final String [] MATCH_STRING = {"LI"}; ! private final static String ENDERS [] = { "BODY", "HTML" }; ! private final static String END_TAG_ENDERS [] = { "UL" }; ! private Stack ulli; ! ! public BulletScanner(Stack ulli) { ! this("",ulli); ! } ! public BulletScanner(String filter, Stack ulli) { ! super(filter, MATCH_STRING, ENDERS, END_TAG_ENDERS, false); ! this.ulli = ulli; ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! return new Bullet(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! ! /** ! * This is the logic that decides when a bullet tag can be allowed ! */ ! public boolean shouldCreateEndTagAndExit() { ! if (ulli.size()==0) return false; ! CompositeTagScanner parentScanner = (CompositeTagScanner)ulli.peek(); ! if (parentScanner == this) { ! ulli.pop(); ! return true; ! } else ! return false; ! } ! public void beforeScanningStarts() { ! ulli.push(this); ! } } Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.63 retrieving revision 1.64 diff -C2 -d -r1.63 -r1.64 *** CompositeTagScanner.java 24 Aug 2003 21:59:42 -0000 1.63 --- CompositeTagScanner.java 3 Sep 2003 23:36:19 -0000 1.64 *************** *** 55,62 **** * MyScanner extends CompositeTagScanner { * private static final String [] MATCH_IDS = { "MYTAG" }; ! * MyScanner() { ! * super(MATCH_IDS); ! * } ! * ... * } * </pre> --- 55,62 ---- * MyScanner extends CompositeTagScanner { * private static final String [] MATCH_IDS = { "MYTAG" }; ! * MyScanner() { ! * super(MATCH_IDS); ! * } ! * ... * } * </pre> *************** *** 69,76 **** * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS, END_TAG_ENDERS, true); ! * } ! * ... * } * </pre> --- 69,76 ---- * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS, END_TAG_ENDERS, true); ! * } ! * ... * } * </pre> *************** *** 84,91 **** * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS,END_TAG_ENDERS, false); ! * } ! * ... * } * </pre> --- 84,91 ---- * private static final String [] ENDERS = {}; * private static final String [] END_TAG_ENDERS = { "BODY", "HTML" }; ! * MyScanner() { ! * super(MATCH_IDS, ENDERS,END_TAG_ENDERS, false); ! * } ! * ... * } * </pre> *************** *** 93,136 **** */ public abstract class CompositeTagScanner extends TagScanner { ! protected String [] nameOfTagToMatch; ! private boolean allowSelfChildren; ! protected Set tagEnderSet; ! private Set endTagEnderSet; ! private boolean balance_quotes; ! ! public CompositeTagScanner(String [] nameOfTagToMatch) { ! this(nameOfTagToMatch,new String[] {}); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) { ! this("",nameOfTagToMatch,tagEnders); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) { ! this("",nameOfTagToMatch,tagEnders,allowSelfChildren); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {},true); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,true); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! boolean allowSelfChildren) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,endTagEnders, allowSelfChildren, false); --- 93,136 ---- */ public abstract class CompositeTagScanner extends TagScanner { ! protected String [] nameOfTagToMatch; ! private boolean allowSelfChildren; ! protected Set tagEnderSet; ! private Set endTagEnderSet; ! private boolean balance_quotes; ! ! public CompositeTagScanner(String [] nameOfTagToMatch) { ! this(nameOfTagToMatch,new String[] {}); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders) { ! this("",nameOfTagToMatch,tagEnders); ! } ! public CompositeTagScanner(String [] nameOfTagToMatch, String [] tagEnders, boolean allowSelfChildren) { ! this("",nameOfTagToMatch,tagEnders,allowSelfChildren); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch) { ! this(filter,nameOfTagToMatch,new String [] {},true); ! } ! public CompositeTagScanner(String filter, String [] nameOfTagToMatch, String [] tagEnders) { ! this(filter,nameOfTagToMatch,tagEnders,true); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! boolean allowSelfChildren) { ! this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); ! } ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,endTagEnders, allowSelfChildren, false); *************** *** 158,229 **** * within quotes. */ ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren, boolean balance_quotes) { ! super(filter); ! this.nameOfTagToMatch = nameOfTagToMatch; ! this.allowSelfChildren = allowSelfChildren; this.balance_quotes = balance_quotes; ! this.tagEnderSet = new HashSet(); ! for (int i=0;i<tagEnders.length;i++) ! tagEnderSet.add(tagEnders[i]); ! this.endTagEnderSet = new HashSet(); ! for (int i=0;i<endTagEnders.length;i++) ! endTagEnderSet.add(endTagEnders[i]); ! } ! public Tag scan(Tag tag, String url, NodeReader reader,String currLine) throws ParserException { ! CompositeTagScannerHelper helper = ! new CompositeTagScannerHelper(this,tag,url,reader,currLine,balance_quotes); ! return helper.scan(); ! } ! /** ! * Override this method if you wish to create any data structures or do anything ! * before the start of the scan. This is just after a tag has triggered the scanner ! * but before the scanner begins its processing. ! */ ! public void beforeScanningStarts() { ! } ! ! /** ! * This method is called everytime a child to the composite is found. It is useful when we ! * need to store special children seperately. Though, all children are collected anyway into a node list. ! */ ! public void childNodeEncountered(Node node) { ! } ! /** ! * You must override this method to create the tag of your choice upon successful parsing. Data required ! * for construction of your tag can be found within tagData and compositeTagData ! */ ! public abstract Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException; ! public final boolean isTagToBeEndedFor(Tag tag) { ! boolean isEndTag = tag instanceof EndTag; ! String tagName = tag.getTagName(); ! if ( ! ( isEndTag && endTagEnderSet.contains(tagName)) || ! (!isEndTag && tagEnderSet.contains(tagName)) ! ) ! return true; else return false; ! } ! public final boolean isAllowSelfChildren() { ! return allowSelfChildren; ! } ! /** ! * Override this method to implement scanner logic that determines if the current scanner is ! * to be allowed. This is useful when there are rules which dont allow recursive tags of the same ! * type. @see BulletScanner ! * @return boolean true/false ! */ ! public boolean shouldCreateEndTagAndExit() { ! return false; ! } } --- 158,229 ---- * within quotes. */ ! public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, ! String [] endTagEnders, ! boolean allowSelfChildren, boolean balance_quotes) { ! super(filter); ! this.nameOfTagToMatch = nameOfTagToMatch; ! this.allowSelfChildren = allowSelfChildren; this.balance_quotes = balance_quotes; ! this.tagEnderSet = new HashSet(); ! for (int i=0;i<tagEnders.length;i++) ! tagEnderSet.add(tagEnders[i]); ! this.endTagEnderSet = new HashSet(); ! for (int i=0;i<endTagEnders.length;i++) ! endTagEnderSet.add(endTagEnders[i]); ! } ! public Tag scan(Tag tag, String url, NodeReader reader,String currLine) throws ParserException { ! CompositeTagScannerHelper helper = ! new CompositeTagScannerHelper(this,tag,url,reader,currLine,balance_quotes); ! return helper.scan(); ! } ! /** ! * Override this method if you wish to create any data structures or do anything ! * before the start of the scan. This is just after a tag has triggered the scanner ! * but before the scanner begins its processing. ! */ ! public void beforeScanningStarts() { ! } ! ! /** ! * This method is called everytime a child to the composite is found. It is useful when we ! * need to store special children seperately. Though, all children are collected anyway into a node list. ! */ ! public void childNodeEncountered(Node node) { ! } ! /** ! * You must override this method to create the tag of your choice upon successful parsing. Data required ! * for construction of your tag can be found within tagData and compositeTagData ! */ ! public abstract Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException; ! public final boolean isTagToBeEndedFor(Tag tag) { ! boolean isEndTag = tag instanceof EndTag; ! String tagName = tag.getTagName(); ! if ( ! ( isEndTag && endTagEnderSet.contains(tagName)) || ! (!isEndTag && tagEnderSet.contains(tagName)) ! ) ! return true; else return false; ! } ! public final boolean isAllowSelfChildren() { ! return allowSelfChildren; ! } ! /** ! * Override this method to implement scanner logic that determines if the current scanner is ! * to be allowed. This is useful when there are rules which dont allow recursive tags of the same ! * type. @see BulletScanner ! * @return boolean true/false ! */ ! public boolean shouldCreateEndTagAndExit() { ! return false; ! } } Index: DivScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DivScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** DivScanner.java 24 Aug 2003 21:59:42 -0000 1.26 --- DivScanner.java 3 Sep 2003 23:36:19 -0000 1.27 *************** *** 35,57 **** public class DivScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"DIV"}; ! ! public DivScanner() { ! this(""); ! } ! public DivScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Div(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } } --- 35,57 ---- public class DivScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"DIV"}; ! ! public DivScanner() { ! this(""); ! } ! public DivScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Div(tagData,compositeTagData); ! } ! public String[] getID() { ! return MATCH_STRING; ! } } Index: DoctypeScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DoctypeScanner.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** DoctypeScanner.java 24 Aug 2003 21:59:42 -0000 1.23 --- DoctypeScanner.java 3 Sep 2003 23:36:19 -0000 1.24 *************** *** 41,65 **** public class DoctypeScanner extends TagScanner { ! public DoctypeScanner() { ! super(); ! } ! public DoctypeScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "!DOCTYPE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tag.getText(); ! tagContents=tagContents.substring(9,tagContents.length()); ! tagData.setTagContents(tagContents); ! return new DoctypeTag(tagData); ! } } --- 41,65 ---- public class DoctypeScanner extends TagScanner { ! public DoctypeScanner() { ! super(); ! } ! public DoctypeScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "!DOCTYPE"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tag.getText(); ! tagContents=tagContents.substring(9,tagContents.length()); ! tagData.setTagContents(tagContents); ! return new DoctypeTag(tagData); ! } } Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.41 retrieving revision 1.42 diff -C2 -d -r1.41 -r1.42 *** FormScanner.java 24 Aug 2003 21:59:42 -0000 1.41 --- FormScanner.java 3 Sep 2003 23:36:19 -0000 1.42 *************** *** 50,77 **** public class FormScanner extends CompositeTagScanner { ! private static final String [] MATCH_ID = { "FORM" }; ! public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; ! private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY" ! }; ! private Stack stack = new Stack(); ! /** ! * HTMLFormScanner constructor comment. ! */ ! public FormScanner(Parser parser) { ! this("", parser); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FormScanner(String filter, Parser parser) ! { ! super(filter,MATCH_ID,formTagEnders,false); ! parser.addScanner(new InputTagScanner("-i")); ! parser.addScanner(new TextareaTagScanner("-t",stack)); ! parser.addScanner(new SelectTagScanner("-select", stack)); ! parser.addScanner(new OptionTagScanner("-option",stack)); ! } ! /** * Extract the location of the image, given the string to be parsed, and the url --- 50,77 ---- public class FormScanner extends CompositeTagScanner { ! private static final String [] MATCH_ID = { "FORM" }; ! public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; ! private boolean linkScannerAlreadyOpen=false; ! private static final String [] formTagEnders = {"HTML","BODY" ! }; ! private Stack stack = new Stack(); ! /** ! * HTMLFormScanner constructor comment. ! */ ! public FormScanner(Parser parser) { ! this("", parser); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FormScanner(String filter, Parser parser) ! { ! super(filter,MATCH_ID,formTagEnders,false); ! parser.addScanner(new InputTagScanner("-i")); ! parser.addScanner(new TextareaTagScanner("-t",stack)); ! parser.addScanner(new SelectTagScanner("-select", stack)); ! parser.addScanner(new OptionTagScanner("-option",stack)); ! } ! /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 80,179 **** * @param url URL of web page being parsed. */ ! public String extractFormLocn(Tag tag,String url) throws ParserException ! { ! try { ! String formURL= tag.getAttribute("ACTION"); ! if (formURL==null) return ""; else ! return (new LinkProcessor()).extract(formURL, url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg= tag.getText(); else msg=""; ! throw new ParserException("HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "+msg+", url = "+url,e); ! } ! } ! public String extractFormName(Tag tag) ! { ! return tag.getAttribute("NAME"); ! } ! public String extractFormMethod(Tag tag) ! { ! String method = tag.getAttribute("METHOD"); ! if (method==null) method = FormTag.GET; ! return method.toUpperCase(); ! } ! /** ! * Scan the tag and extract the information related to the <IMG> tag. The url of the ! * initiating scan has to be provided in case relative links are found. The initial ! * url is then prepended to it to give an absolute link. ! * The NodeReader is provided in order to do a lookahead operation. We assume that ! * the identification has already been performed using the evaluate() method. ! * @param tag HTML Tag to be scanned for identification ! * @param url The initiating url of the scan (Where the html page lies) ! * @param reader The reader object responsible for reading the html page ! * @param currentLine The current line (automatically provided by Tag) ! */ ! // public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException ! // { ! // if (linkScannerAlreadyOpen) { ! // String newLine = insertEndTagBeforeNode(tag, currentLine); ! // reader.changeLine(newLine); ! // return new EndTag( ! // new TagData( ! // tag.elementBegin(), ! // tag.elementBegin()+3, ! // "A", ! // currentLine ! // ) ! // ); ! // } ! // return super.scan(tag,url,reader,currentLine); ! // } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! return MATCH_ID; ! } ! public boolean evaluate(String s, TagScanner previousOpenScanner) { ! if (previousOpenScanner instanceof LinkScanner) { ! linkScannerAlreadyOpen = true; ! StringBuffer msg= new StringBuffer(); ! msg.append("<"); ! msg.append(s); ! msg.append(">"); ! msg.append(PREVIOUS_DIRTY_LINK_MESSAGE); ! feedback.warning(msg.toString()); ! // This is dirty HTML. Assume the current tag is ! // not a new link tag - but an end tag. This is actually a really wild bug - ! // Internet Explorer actually parses such tags. ! // So - we shall then proceed to fool the scanner into sending an endtag of type </A> ! // For this - set the dirty flag to true and return ! } ! else ! linkScannerAlreadyOpen = false; ! return super.evaluate(s, previousOpenScanner); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) ! compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) stack.pop (); ! return new FormTag(tagData, compositeTagData); ! } ! public void beforeScanningStarts() { ! stack.push(this); ! } } --- 80,179 ---- * @param url URL of web page being parsed. */ ! public String extractFormLocn(Tag tag,String url) throws ParserException ! { ! try { ! String formURL= tag.getAttribute("ACTION"); ! if (formURL==null) return ""; else ! return (new LinkProcessor()).extract(formURL, url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg= tag.getText(); else msg=""; ! throw new ParserException("HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "+msg+", url = "+url,e); ! } ! } ! public String extractFormName(Tag tag) ! { ! return tag.getAttribute("NAME"); ! } ! public String extractFormMethod(Tag tag) ! { ! String method = tag.getAttribute("METHOD"); ! if (method==null) method = FormTag.GET; ! return method.toUpperCase(); ! } ! /** ! * Scan the tag and extract the information related to the <IMG> tag. The url of the ! * initiating scan has to be provided in case relative links are found. The initial ! * url is then prepended to it to give an absolute link. ! * The NodeReader is provided in order to do a lookahead operation. We assume that ! * the identification has already been performed using the evaluate() method. ! * @param tag HTML Tag to be scanned for identification ! * @param url The initiating url of the scan (Where the html page lies) ! * @param reader The reader object responsible for reading the html page ! * @param currentLine The current line (automatically provided by Tag) ! */ ! // public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException ! // { ! // if (linkScannerAlreadyOpen) { ! // String newLine = insertEndTagBeforeNode(tag, currentLine); ! // reader.changeLine(newLine); ! // return new EndTag( ! // new TagData( ! // tag.elementBegin(), ! // tag.elementBegin()+3, ! // "A", ! // currentLine ! // ) ! // ); ! // } ! // return super.scan(tag,url,reader,currentLine); ! // } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! return MATCH_ID; ! } ! public boolean evaluate(String s, TagScanner previousOpenScanner) { ! if (previousOpenScanner instanceof LinkScanner) { ! linkScannerAlreadyOpen = true; ! StringBuffer msg= new StringBuffer(); ! msg.append("<"); ! msg.append(s); ! msg.append(">"); ! msg.append(PREVIOUS_DIRTY_LINK_MESSAGE); ! feedback.warning(msg.toString()); ! // This is dirty HTML. Assume the current tag is ! // not a new link tag - but an end tag. This is actually a really wild bug - ! // Internet Explorer actually parses such tags. ! // So - we shall then proceed to fool the scanner into sending an endtag of type </A> ! // For this - set the dirty flag to true and return ! } ! else ! linkScannerAlreadyOpen = false; ! return super.evaluate(s, previousOpenScanner); ! } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) ! throws ParserException { ! String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) ! compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) stack.pop (); ! return new FormTag(tagData, compositeTagData); ! } ! public void beforeScanningStarts() { ! stack.push(this); ! } } Index: FrameScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameScanner.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FrameScanner.java 24 Aug 2003 21:59:42 -0000 1.25 --- FrameScanner.java 3 Sep 2003 23:36:19 -0000 1.26 *************** *** 49,66 **** public class FrameScanner extends TagScanner { ! /** ! * Overriding the default constructor ! */ ! public FrameScanner() ! { ! super(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FrameScanner(String filter) ! { ! super(filter); ! } /** * Extract the location of the image, given the string to be parsed, and the url --- 49,66 ---- public class FrameScanner extends TagScanner { ! /** ! * Overriding the default constructor ! */ ! public FrameScanner() ! { ! super(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public FrameScanner(String filter) ! { ! super(filter); ! } /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 69,108 **** * @param url URL of web page being parsed. */ ! public String extractFrameLocn(Tag tag,String url) throws ParserException ! { ! try { ! Hashtable table = tag.getAttributes(); ! String relativeFrame = (String)table.get("SRC"); ! if (relativeFrame==null) return ""; else ! return (new LinkProcessor()).extract(relativeFrame,url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg = tag.getText(); else msg = "null"; ! throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); ! } ! } ! ! public String extractFrameName(Tag tag,String url) { ! return tag.getAttribute("NAME"); ! } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "FRAME"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String frameUrl = extractFrameLocn(tag,url); ! String frameName = extractFrameName(tag,url); ! ! return new FrameTag(tagData,frameUrl,frameName); ! } } --- 69,108 ---- * @param url URL of web page being parsed. */ ! public String extractFrameLocn(Tag tag,String url) throws ParserException ! { ! try { ! Hashtable table = tag.getAttributes(); ! String relativeFrame = (String)table.get("SRC"); ! if (relativeFrame==null) return ""; else ! return (new LinkProcessor()).extract(relativeFrame,url); ! } ! catch (Exception e) { ! String msg; ! if (tag!=null) msg = tag.getText(); else msg = "null"; ! throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); ! } ! } ! ! public String extractFrameName(Tag tag,String url) { ! return tag.getAttribute("NAME"); ! } ! /** ! * @see org.htmlparser.scanners.TagScanner#getID() ! */ ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "FRAME"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String frameUrl = extractFrameLocn(tag,url); ! String frameName = extractFrameName(tag,url); ! ! return new FrameTag(tagData,frameUrl,frameName); ! } } Index: FrameSetScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameSetScanner.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** FrameSetScanner.java 24 Aug 2003 21:59:42 -0000 1.24 --- FrameSetScanner.java 3 Sep 2003 23:36:19 -0000 1.25 *************** *** 47,71 **** public class FrameSetScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"FRAMESET"}; ! ! public FrameSetScanner() ! { ! super(MATCH_NAME); ! } ! public FrameSetScanner(String filter) ! { ! super(filter,MATCH_NAME); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new FrameSetTag(tagData,compositeTagData); ! } } --- 47,71 ---- public class FrameSetScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"FRAMESET"}; ! ! public FrameSetScanner() ! { ! super(MATCH_NAME); ! } ! public FrameSetScanner(String filter) ! { ! super(filter,MATCH_NAME); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new FrameSetTag(tagData,compositeTagData); ! } } Index: HeadScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HeadScanner.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** HeadScanner.java 24 Aug 2003 21:59:42 -0000 1.11 --- HeadScanner.java 3 Sep 2003 23:36:19 -0000 1.12 *************** *** 1,61 **** ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by Dhaval Udani ! // dha...@or... ! ! package org.htmlparser.scanners; ! ! import org.htmlparser.tags.HeadTag; ! import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! ! public class HeadScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"HEAD"}; ! private static final String ENDERS [] = {"BODY"}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! ! public HeadScanner() { ! this(""); ! } ! ! public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new HeadTag(tagData,compositeTagData); ! } ! } --- 1,61 ---- ! // HTMLParser Library v1_4_20030824 - A java-based parser for HTML ! // Copyright (C) Dec 31, 2000 Somik Raha ! // ! // This library is free software; you can redistribute it and/or ! // modify it under the terms of the GNU Lesser General Public ! // License as published by the Free Software Foundation; either ! // version 2.1 of the License, or (at your option) any later version. ! // ! // This library is distributed in the hope that it will be useful, ! // but WITHOUT ANY WARRANTY; without even the implied warranty of ! // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ! // Lesser General Public License for more details. ! // ! // You should have received a copy of the GNU Lesser General Public ! // License along with this library; if not, write to the Free Software ! // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // ! // For any questions or suggestions, you can write to me at : ! // Email :so...@in... ! // ! // Postal Address : ! // Somik Raha ! // Extreme Programmer & Coach ! // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, ! // CA 94708, USA ! // Website : http://www.industriallogic.com ! // ! // This class was contributed by Dhaval Udani ! // dha...@or... ! ! package org.htmlparser.scanners; ! ! import org.htmlparser.tags.HeadTag; ! import org.htmlparser.tags.Tag; ! import org.htmlparser.tags.data.CompositeTagData; ! import org.htmlparser.tags.data.TagData; ! ! public class HeadScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"HEAD"}; ! private static final String ENDERS [] = {"BODY"}; ! private static final String END_TAG_ENDERS [] = {"HTML"}; ! ! public HeadScanner() { ! this(""); ! } ! ! public HeadScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); ! } ! ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new HeadTag(tagData,compositeTagData); ! } ! } Index: HtmlScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HtmlScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** HtmlScanner.java 24 Aug 2003 21:59:42 -0000 1.26 --- HtmlScanner.java 3 Sep 2003 23:36:19 -0000 1.27 *************** *** 35,57 **** public class HtmlScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"HTML"}; ! ! public HtmlScanner() { ! this(""); ! } ! public HtmlScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Html(tagData,compositeTagData); ! } } --- 35,57 ---- public class HtmlScanner extends CompositeTagScanner { ! private static String MATCH_STRING [] = {"HTML"}; ! ! public HtmlScanner() { ! this(""); ! } ! public HtmlScanner(String filter) { ! super(filter, MATCH_STRING); ! } ! public String[] getID() { ! return MATCH_STRING; ! } ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new Html(tagData,compositeTagData); ! } } Index: ImageScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ImageScanner.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** ImageScanner.java 24 Aug 2003 21:59:42 -0000 1.24 --- ImageScanner.java 3 Sep 2003 23:36:20 -0000 1.25 *************** *** 48,70 **** public class ImageScanner extends TagScanner { ! public static final String IMAGE_SCANNER_ID = "IMG"; ! private Hashtable table; ! private LinkProcessor processor; ! /** ! * Overriding the default constructor ! */ ! public ImageScanner() ! { ! super(); ! processor = new LinkProcessor(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public ImageScanner(String filter,LinkProcessor processor) ! { ! super(filter); ! this.processor = processor; ! } /** * Extract the location of the image, given the string to be parsed, and the url --- 48,70 ---- public class ImageScanner extends TagScanner { ! public static final String IMAGE_SCANNER_ID = "IMG"; ! private Hashtable table; ! private LinkProcessor processor; ! /** ! * Overriding the default constructor ! */ ! public ImageScanner() ! { ! super(); ! processor = new LinkProcessor(); ! } ! /** ! * Overriding the constructor to accept the filter ! */ ! public ImageScanner(String filter,LinkProcessor processor) ! { ! super(filter); ! this.processor = processor; ! } /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 73,117 **** * @param url URL of web page being parsed. */ ! public String extractImageLocn(Tag tag,String url) throws ParserException ! { ! String relativeLink=null; ! try { ! table = tag.getAttributes(); ! relativeLink = (String)table.get("SRC"); ! if (relativeLink!=null) { ! relativeLink = ParserUtils.removeChars(relativeLink,'\n'); ! relativeLink = ParserUtils.removeChars(relativeLink,'\r'); ! } ! if (relativeLink==null || relativeLink.length()==0) { ! // try fix ! String tagText = tag.getText().toUpperCase(); ! int indexSrc = tagText.indexOf("SRC"); ! if (indexSrc != -1) { ! // There is a missing equals. ! tag.setText(tag.getText().substring(0,indexSrc+3)+"="+tag.getText().substring(indexSrc+3,tag.getText().length())); ! table = tag.redoParseAttributes(); ! relativeLink = (String) table.get("SRC"); ! ! } ! } ! if (relativeLink==null) return ""; else ! return processor.extract(relativeLink,url); ! } ! catch (Exception e) { ! throw new ParserException("HTMLImageScanner.extractImageLocn() : Error in extracting image location, relativeLink = "+relativeLink+", url = "+url,e); ! } ! } ! ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = IMAGE_SCANNER_ID; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String link = extractImageLocn(tag,url); ! return new ImageTag(tagData, link); ! } } --- 73,117 ---- * @param url URL of web page being parsed. */ ! public String extractImageLocn(Tag tag,String url) throws ParserException ! { ! String relativeLink=null; ! try { ! table = tag.getAttributes(); ! relativeLink = (String)table.get("SRC"); ! if (relativeLink!=null) { ! relativeLink = ParserUtils.removeChars(relativeLink,'\n'); ! relativeLink = ParserUtils.removeChars(relativeLink,'\r'); ! } ! if (relativeLink==null || relativeLink.length()==0) { ! // try fix ! String tagText = tag.getText().toUpperCase(); ! int indexSrc = tagText.indexOf("SRC"); ! if (indexSrc != -1) { ! // There is a missing equals. ! tag.setText(tag.getText().substring(0,indexSrc+3)+"="+tag.getText().substring(indexSrc+3,tag.getText().length())); ! table = tag.redoParseAttributes(); ! relativeLink = (String) table.get("SRC"); ! ! } ! } ! if (relativeLink==null) return ""; else ! return processor.extract(relativeLink,url); ! } ! catch (Exception e) { ! throw new ParserException("HTMLImageScanner.extractImageLocn() : Error in extracting image location, relativeLink = "+relativeLink+", url = "+url,e); ! } ! } ! ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = IMAGE_SCANNER_ID; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String link = extractImageLocn(tag,url); ! return new ImageTag(tagData, link); ! } } Index: InputTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/InputTagScanner.java,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** InputTagScanner.java 24 Aug 2003 21:59:42 -0000 1.22 --- InputTagScanner.java 3 Sep 2003 23:36:20 -0000 1.23 *************** *** 36,59 **** public class InputTagScanner extends TagScanner { ! public InputTagScanner() ! { ! super(); ! } ! ! public InputTagScanner(String filter) ! { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "INPUT"; ! return ids; ! } ! ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! return new InputTag(tagData); ! } } --- 36,59 ---- public class InputTagScanner extends TagScanner { ! public InputTagScanner() ! { ! super(); ! } ! ! public InputTagScanner(String filter) ! { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[1]; ! ids[0] = "INPUT"; ! return ids; ! } ! ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! return new InputTag(tagData); ! } } Index: JspScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/JspScanner.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** JspScanner.java 24 Aug 2003 21:59:42 -0000 1.23 --- JspScanner.java 3 Sep 2003 23:36:20 -0000 1.24 *************** *** 40,65 **** public class JspScanner extends TagScanner { ! public JspScanner() { ! super(); ! } ! public JspScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[3]; ! ids[0] = "%"; ! ids[1] = "%="; ! ids[2] = "%@"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tagData.getTagContents(); ! tagData.setTagContents(tagContents.substring(1,tagContents.length()-1)); ! return new JspTag(tagData); ! } } --- 40,65 ---- public class JspScanner extends TagScanner { ! public JspScanner() { ! super(); ! } ! public JspScanner(String filter) { ! super(filter); ! } ! public String [] getID() { ! String [] ids = new String[3]; ! ids[0] = "%"; ! ids[1] = "%="; ! ids[2] = "%@"; ! return ids; ! } ! protected Tag createTag(TagData tagData, Tag tag, String url) ! throws ParserException { ! String tagContents = tagData.getTagContents(); ! tagData.setTagContents(tagContents.substring(1,tagContents.length()-1)); ! return new JspTag(tagData); ! } } Index: LabelScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LabelScanner.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** LabelScanner.java 24 Aug 2003 21:59:42 -0000 1.29 --- LabelScanner.java 3 Sep 2003 23:36:20 -0000 1.30 *************** *** 38,59 **** public class LabelScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"LABEL"}; ! public LabelScanner() { ! super(MATCH_NAME,new String [] {},false); ! } ! ! public LabelScanner(String filter) { ! super(filter,MATCH_NAME,new String [] {},false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new LabelTag(tagData,compositeTagData); ! } } --- 38,59 ---- public class LabelScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"LABEL"}; ! public LabelScanner() { ! super(MATCH_NAME,new String [] {},false); ! } ! ! public LabelScanner(String filter) { ! super(filter,MATCH_NAME,new String [] {},false); ! } ! public String [] getID() { ! return MATCH_NAME; ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) { ! return new LabelTag(tagData,compositeTagData); ! } } Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** LinkScanner.java 24 Aug 2003 21:59:42 -0000 1.49 --- LinkScanner.java 3 Sep 2003 23:36:20 -0000 1.50 *************** *** 51,121 **** public class LinkScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"A"}; ! public static final String LINK_SCANNER_ID = "A"; ! public static final String DIRTY_TAG_MESSAGE=" is a dirty link tag - the tag was not closed. \nWe encountered an open tag, before the previous end tag was found.\nCorrecting this.."; ! private LinkProcessor processor; ! private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! ! /** ! * Overriding the default constructor ! */ ! public LinkScanner() { ! this(""); ! } ! ! /** ! * Overriding the constructor to accept the filter ! */ ! public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); ! processor = new LinkProcessor(); ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) throws ParserException { ! String link = extractLink(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! int mailto = link.indexOf("mailto"); ! boolean mailLink=false; ! if (mailto==0) ! { ! // yes it is ! mailto = link.indexOf(":"); ! link = link.substring(mailto+1); ! mailLink = true; ! } ! int javascript = link.indexOf("javascript:"); ! boolean javascriptLink = false; ! if (javascript == 0) { ! link = link.substring(11); // this magic number is "javascript:".length() ! javascriptLink = true; ! } ! String accessKey = getAccessKey(compositeTagData.getStartTag()); ! String myLinkText = compositeTagData.getChildren().toString(); ! ! LinkTag linkTag = new LinkTag( ! tagData, ! compositeTagData, ! new LinkData( ! link, ! myLinkText, ! accessKey, ! mailLink, ! javascriptLink ! ) ! ); ! linkTag.setThisScanner(this); ! return linkTag; ! } ! ! /** ! * Template Method, used to decide if this scanner can handle the Link tag type. If ! * the evaluation returns true, the calling side makes a call to scan(). ! * @param s The complete text contents of the Tag. ! * @param previousOpenScanner Indicates any previous scanner which hasnt completed, before the current ! * scan has begun, and hence allows us to write scanners that can work with dirty html ! */ public boolean evaluate (String s, TagScanner previousOpenScanner) { --- 51,121 ---- public class LinkScanner extends CompositeTagScanner { ! private static final String MATCH_NAME [] = {"A"}; ! public static final String LINK_SCANNER_ID = "A"; ! public static final String DIRTY_TAG_MESSAGE=" is a dirty link tag - the tag was not closed. \nWe encountered an open tag, before the previous end tag was found.\nCorrecting this.."; ! private LinkProcessor processor; ! private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! ! /** ! * Overriding the default constructor ! */ ! public LinkScanner() { ! this(""); ! } ! ! /** ! * Overriding the constructor to accept the filter ! */ ! public LinkScanner(String filter) { ! super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); ! processor = new LinkProcessor(); ! } ! ! public Tag createTag( ! TagData tagData, ! CompositeTagData compositeTagData) throws ParserException { ! String link = extractLink(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! int mailto = link.indexOf("mailto"); ! boolean mailLink=false; ! if (mailto==0) ! { ! // yes it is ! mailto = link.indexOf(":"); ! link = link.substring(mailto+1); ! mailLink = true; ! } ! int javascript = link.indexOf("javascript:"); ! boolean javascriptLink = false; ! if (javascript == 0) { ! link = link.substring(11); // this magic number is "javascript:".length() ! javascriptLink = true; ! } ! String accessKey = getAccessKey(compositeTagData.getStartTag()); ! String myLinkText = compositeTagData.getChildren().toString(); ! ! LinkTag linkTag = new LinkTag( ! tagData, ! compositeTagData, ! new LinkData( ! link, ! myLinkText, ! accessKey, ! mailLink, ! javascriptLink ! ) ! ); ! linkTag.setThisScanner(this); ! return linkTag; ! } ! ! /** ... [truncated message content] |
From: <der...@us...> - 2003-09-03 23:36:54
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/codeMetrics In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tests/codeMetrics Modified Files: LineCounter.java Log Message: Change tabs to spaces in all source files. Index: LineCounter.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/codeMetrics/LineCounter.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** LineCounter.java 24 Aug 2003 21:59:43 -0000 1.4 --- LineCounter.java 3 Sep 2003 23:36:20 -0000 1.5 *************** *** 36,100 **** public class LineCounter { ! ! public int count(File file) { ! System.out.println("Handling "+file.getName()); ! int count = 0; ! // Get all files in current directory ! if (file.isDirectory()) { ! // Get the listing in this directory ! count = recurseDirectory(file, count); ! } else { ! // It is a file ! count = countLinesIn(file); ! } ! return count; ! } ! /** ! * Counts code excluding comments and blank lines in the given file ! * @param file ! * @return int ! */ ! public int countLinesIn(File file) { ! int count = 0; ! System.out.println("Counting "+file.getName()); ! try { ! BufferedReader reader = new BufferedReader(new FileReader(file.getAbsolutePath())); ! String line = null; ! do { ! line = reader.readLine(); ! if (line!=null && ! line.indexOf("*")==-1 && ! line.indexOf("//")==-1 && ! line.length()>0 ! ) count++; ! } ! while (line!=null); ! } ! catch (Exception e) { ! e.printStackTrace(); ! } ! return count; ! } ! public int recurseDirectory(File file, int count) { ! File [] files = file.listFiles(new FileFilter() { ! public boolean accept(File file) { ! if (file.getName().indexOf(".java")!=-1 || file.isDirectory()) { ! return true; ! } else { ! return false; ! } ! } ! }); ! for (int i=0;i<files.length;i++) { ! count += count(files[i]); ! } ! return count; ! } ! ! public static void main(String [] args) { ! LineCounter lc = new LineCounter(); ! System.out.println("Line Count = "+lc.count(new File(args[0]))); ! } } --- 36,100 ---- public class LineCounter { ! ! public int count(File file) { ! System.out.println("Handling "+file.getName()); ! int count = 0; ! // Get all files in current directory ! if (file.isDirectory()) { ! // Get the listing in this directory ! count = recurseDirectory(file, count); ! } else { ! // It is a file ! count = countLinesIn(file); ! } ! return count; ! } ! /** ! * Counts code excluding comments and blank lines in the given file ! * @param file ! * @return int ! */ ! public int countLinesIn(File file) { ! int count = 0; ! System.out.println("Counting "+file.getName()); ! try { ! BufferedReader reader = new BufferedReader(new FileReader(file.getAbsolutePath())); ! String line = null; ! do { ! line = reader.readLine(); ! if (line!=null && ! line.indexOf("*")==-1 && ! line.indexOf("//")==-1 && ! line.length()>0 ! ) count++; ! } ! while (line!=null); ! } ! catch (Exception e) { ! e.printStackTrace(); ! } ! return count; ! } ! public int recurseDirectory(File file, int count) { ! File [] files = file.listFiles(new FileFilter() { ! public boolean accept(File file) { ! if (file.getName().indexOf(".java")!=-1 || file.isDirectory()) { ! return true; ! } else { ! return false; ! } ! } ! }); ! for (int i=0;i<files.length;i++) { ! count += count(files[i]); ! } ! return count; ! } ! ! public static void main(String [] args) { ! LineCounter lc = new LineCounter(); ! System.out.println("Line Count = "+lc.count(new File(args[0]))); ! } } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper In directory sc8-pr-cvs1:/tmp/cvs-serv31228/parserHelper Modified Files: AttributeParser.java CompositeTagScannerHelper.java ParserHelper.java ScriptScannerHelper.java StringParser.java TagParser.java Log Message: Change tabs to spaces in all source files. Index: AttributeParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/AttributeParser.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** AttributeParser.java 24 Aug 2003 21:59:42 -0000 1.36 --- AttributeParser.java 3 Sep 2003 23:36:19 -0000 1.37 *************** *** 52,87 **** ! private Hashtable attributeTable; ! private String element; ! private String name; ! private String value; ! private String part; ! private String empty; ! private boolean equal; ! private StringTokenizer tokenizer; ! private boolean doubleQuote; ! private boolean singleQuote; ! private boolean ready; ! private String currentToken; ! private String tokenAccumulator; ! /** ! * Method to break the tag into pieces. ! * @param text All the text within the tag inside < and >. * @return A Hastable with elements containing the ! * pieces of the tag. The tag-name has the value field set to ! * the constant Tag.TAGNAME. In addition the tag-name is ! * stored into the Hashtable with the name Tag.TAGNAME ! * where the value is the name of the tag. ! * Tag parameters without value ! * has the value "". Parameters with value are represented ! * in the Hastable by a name/value pair. ! * As html is case insensitive but Hastable is not are all ! * names converted into UPPERCASE to the Hastable ! * E.g extract the href values from A-tag's and print them ! * <pre> ! * * Tag tag; ! * Hashtable h; ! * String tmp; * try { * NodeReader in = new NodeReader(new FileReader(path),2048); --- 52,87 ---- ! private Hashtable attributeTable; ! private String element; ! private String name; ! private String value; ! private String part; ! private String empty; ! private boolean equal; ! private StringTokenizer tokenizer; ! private boolean doubleQuote; ! private boolean singleQuote; ! private boolean ready; ! private String currentToken; ! private String tokenAccumulator; ! /** ! * Method to break the tag into pieces. ! * @param text All the text within the tag inside < and >. * @return A Hastable with elements containing the ! * pieces of the tag. The tag-name has the value field set to ! * the constant Tag.TAGNAME. In addition the tag-name is ! * stored into the Hashtable with the name Tag.TAGNAME ! * where the value is the name of the tag. ! * Tag parameters without value ! * has the value "". Parameters with value are represented ! * in the Hastable by a name/value pair. ! * As html is case insensitive but Hastable is not are all ! * names converted into UPPERCASE to the Hastable ! * E.g extract the href values from A-tag's and print them ! * <pre> ! * * Tag tag; ! * Hashtable h; ! * String tmp; * try { * NodeReader in = new NodeReader(new FileReader(path),2048); *************** *** 102,118 **** * ie.printStackTrace(); * } ! * </pre> ! * ! */ public Hashtable parseAttributes (String text) { ! attributeTable = new SpecialHashtable(); ! part = null; ! empty = null; name=null; value=null; element=null; ! equal = false; delim=DELIMETERS; ! tokenizer = new StringTokenizer(text,delim,true); while (true) { part=getNextPartUsing(delim); --- 102,118 ---- * ie.printStackTrace(); * } ! * </pre> ! * ! */ public Hashtable parseAttributes (String text) { ! attributeTable = new SpecialHashtable(); ! part = null; ! empty = null; name=null; value=null; element=null; ! equal = false; delim=DELIMETERS; ! tokenizer = new StringTokenizer(text,delim,true); while (true) { part=getNextPartUsing(delim); *************** *** 127,131 **** } else { ! processInvalidPart(); if (!tokenizer.hasMoreTokens ()) break; --- 127,131 ---- } else { ! processInvalidPart(); if (!tokenizer.hasMoreTokens ()) break; *************** *** 160,201 **** } ! private boolean isValid(String part) { ! return part != null && (0 < part.length ()); ! } ! private void process(String part) { ! if (name == null) { ! if (!part.substring(0,1).equals(" ")) { ! name = part; ! equal=true; ! } ! } ! else { ! if (equal){ ! if (part.equals("=")) { ! equal=false; ! delim=DELIMETERS_WITHOUT_EQUALS; value=Tag.NOTHING; ! } ! else { ! putDataIntoTable(attributeTable,name,Tag.NULLVALUE,false); ! name=part; ! value=null; ! } ! } ! if (!equal && !part.equals("=")) { ! value=part; ! putDataIntoTable(attributeTable,name,value,false); ! name=null; ! value=null; ! } ! } ! } private String getNextPartUsing(String delimiter) { ! tokenAccumulator = null; ! doubleQuote = false; ! singleQuote = false; ! ready = false; while (ready == false && tokenizer.hasMoreTokens()) { currentToken = tokenizer.nextToken(delimiter); --- 160,201 ---- } ! private boolean isValid(String part) { ! return part != null && (0 < part.length ()); ! } ! private void process(String part) { ! if (name == null) { ! if (!part.substring(0,1).equals(" ")) { ! name = part; ! equal=true; ! } ! } ! else { ! if (equal){ ! if (part.equals("=")) { ! equal=false; ! delim=DELIMETERS_WITHOUT_EQUALS; value=Tag.NOTHING; ! } ! else { ! putDataIntoTable(attributeTable,name,Tag.NULLVALUE,false); ! name=part; ! value=null; ! } ! } ! if (!equal && !part.equals("=")) { ! value=part; ! putDataIntoTable(attributeTable,name,value,false); ! name=null; ! value=null; ! } ! } ! } private String getNextPartUsing(String delimiter) { ! tokenAccumulator = null; ! doubleQuote = false; ! singleQuote = false; ! ready = false; while (ready == false && tokenizer.hasMoreTokens()) { currentToken = tokenizer.nextToken(delimiter); *************** *** 210,214 **** tokenAccumulator=""; } else { ! tokenAccumulator = currentToken; ready = isReadyWithNextPart(currentToken); } --- 210,214 ---- tokenAccumulator=""; } else { ! tokenAccumulator = currentToken; ready = isReadyWithNextPart(currentToken); } *************** *** 217,256 **** } ! private boolean isReadyWithNextPart(String currentToken) { ! boolean ready = false; ! if (isDelimeter(currentToken)) { ! if (currentToken.equals("=")){ ! ready=true; ! } ! } ! else { ! ready=true; ! } ! return ready; ! } ! private boolean isDelimeter(String token) { ! return delim.indexOf(tokenAccumulator)>=0; ! } ! ! private boolean isCurrentTokenSingleQuote() { ! return currentToken.charAt(0)==SINGLE_QUOTE; ! } ! private boolean isCurrentTokenDoubleQuote() { ! return currentToken.charAt(0)==DOUBLE_QUOTE; ! } ! private void combineTokensInsideSingleOrDoubleQuotes() { ! if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){ ! doubleQuote= false; ! ready=true; ! } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) { ! singleQuote=false; ! ready=true; ! }else { ! tokenAccumulator += currentToken; ! } ! } --- 217,256 ---- } ! private boolean isReadyWithNextPart(String currentToken) { ! boolean ready = false; ! if (isDelimeter(currentToken)) { ! if (currentToken.equals("=")){ ! ready=true; ! } ! } ! else { ! ready=true; ! } ! return ready; ! } ! private boolean isDelimeter(String token) { ! return delim.indexOf(tokenAccumulator)>=0; ! } ! ! private boolean isCurrentTokenSingleQuote() { ! return currentToken.charAt(0)==SINGLE_QUOTE; ! } ! private boolean isCurrentTokenDoubleQuote() { ! return currentToken.charAt(0)==DOUBLE_QUOTE; ! } ! private void combineTokensInsideSingleOrDoubleQuotes() { ! if (doubleQuote && currentToken.charAt(0)==DOUBLE_QUOTE){ ! doubleQuote= false; ! ready=true; ! } else if (singleQuote && currentToken.charAt(0)==SINGLE_QUOTE) { ! singleQuote=false; ! ready=true; ! }else { ! tokenAccumulator += currentToken; ! } ! } Index: CompositeTagScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** CompositeTagScannerHelper.java 24 Aug 2003 21:59:42 -0000 1.43 --- CompositeTagScannerHelper.java 3 Sep 2003 23:36:19 -0000 1.44 *************** *** 41,225 **** public class CompositeTagScannerHelper { ! private CompositeTagScanner scanner; ! private Tag tag; ! private String url; ! private NodeReader reader; ! private String currLine; ! private Tag endTag; ! private NodeList nodeList; ! private boolean endTagFound; ! private int startingLineNumber; ! private int endingLineNumber; ! private boolean balance_quotes; ! ! public CompositeTagScannerHelper( ! CompositeTagScanner scanner, ! Tag tag, ! String url, ! NodeReader reader, ! String currLine, boolean balance_quotes) { ! ! this.scanner = scanner; ! this.tag = tag; ! this.url = url; ! this.reader = reader; ! this.currLine = currLine; ! this.endTag = null; ! this.nodeList = new NodeList(); ! this.endTagFound = false; this.balance_quotes = balance_quotes; ! } ! public Tag scan() throws ParserException { ! this.startingLineNumber = reader.getLastLineNumber(); ! if (shouldCreateEndTagAndExit()) { ! return createEndTagAndRepositionReader(); ! } ! scanner.beforeScanningStarts(); ! Node currentNode = tag; ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) { ! do { ! currentNode = reader.readElement(balance_quotes); ! if (currentNode==null) continue; ! currLine = reader.getCurrentLine(); ! if (currentNode instanceof Tag) ! doForceCorrectionCheckOn((Tag)currentNode); ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) ! doChildAndEndTagCheckOn(currentNode); ! } ! while (currentNode!=null && !endTagFound); ! } ! if (endTag==null) { ! createCorrectionEndTagBefore(reader.getLastReadPosition()+1); ! } ! ! this.endingLineNumber = reader.getLastLineNumber(); ! return createTag(); ! } ! private boolean shouldCreateEndTagAndExit() { ! return scanner.shouldCreateEndTagAndExit(); ! } ! private Tag createEndTagAndRepositionReader() { ! createCorrectionEndTagBefore(tag.elementBegin()); ! reader.setPosInLine(tag.elementBegin()); ! reader.setDontReadNextLine(true); ! return endTag; ! } ! private void createCorrectionEndTagBefore(int pos) { ! String endTagName = tag.getTagName(); ! int endTagBegin = pos ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { ! String endTagName = tag.getTagName(); ! int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! possibleEndTagCauser.setTagBegin(endTagEnd+1); ! reader.addNextParsedNode(possibleEndTagCauser); ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private Tag createTag() throws ParserException { ! CompositeTag newTag = ! (CompositeTag) ! scanner.createTag( ! new TagData( ! tag.elementBegin(), ! endTag.elementEnd(), ! startingLineNumber, ! endingLineNumber, ! tag.getText(), ! currLine, ! url, ! tag.isEmptyXmlTag() ! ), ! new CompositeTagData( ! tag,endTag,nodeList ! ) ! ); ! for (int i=0;i<newTag.getChildCount();i++) { ! Node child = newTag.childAt(i); ! child.setParent(newTag); ! } ! return newTag; ! } ! private void doChildAndEndTagCheckOn(Node currentNode) { ! if (currentNode instanceof EndTag) { ! EndTag possibleEndTag = (EndTag)currentNode; ! if (isExpectedEndTag(possibleEndTag)) { ! endTagFound = true; ! endTag = possibleEndTag; ! return; ! } ! } ! nodeList.add(currentNode); ! scanner.childNodeEncountered(currentNode); ! } ! private boolean isExpectedEndTag(EndTag possibleEndTag) { ! return possibleEndTag.getTagName().equals(tag.getTagName()); ! } ! private void doEmptyXmlTagCheckOn(Node currentNode) { ! if (currentNode instanceof Tag) { ! Tag possibleEndTag = (Tag)currentNode; ! if (isXmlEndTag(tag)) { ! endTag = possibleEndTag; ! endTagFound = true; ! } ! } ! } ! private void doForceCorrectionCheckOn(Tag possibleEndTagCauser) { ! if (isEndTagMissing(possibleEndTagCauser)) { ! createCorrectionEndTagBefore(possibleEndTagCauser); ! endTagFound = true; ! } ! } ! private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || ! isSelfChildTagRecievedIncorrectly(possibleEndTag); ! } ! private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag) { ! return ( ! !(possibleEndTag instanceof EndTag) && ! !scanner.isAllowSelfChildren() && ! possibleEndTag.getTagName().equals(tag.getTagName()) ! ); ! } ! ! public boolean isXmlEndTag(Tag tag) { ! String tagText = tag.getText(); ! int lastSlash = tagText.lastIndexOf("/"); ! return (lastSlash == tagText.length()-1 || tag.isEmptyXmlTag()) && tag.getText().indexOf("://")==-1; ! } } --- 41,225 ---- public class CompositeTagScannerHelper { ! private CompositeTagScanner scanner; ! private Tag tag; ! private String url; ! private NodeReader reader; ! private String currLine; ! private Tag endTag; ! private NodeList nodeList; ! private boolean endTagFound; ! private int startingLineNumber; ! private int endingLineNumber; ! private boolean balance_quotes; ! ! public CompositeTagScannerHelper( ! CompositeTagScanner scanner, ! Tag tag, ! String url, ! NodeReader reader, ! String currLine, boolean balance_quotes) { ! ! this.scanner = scanner; ! this.tag = tag; ! this.url = url; ! this.reader = reader; ! this.currLine = currLine; ! this.endTag = null; ! this.nodeList = new NodeList(); ! this.endTagFound = false; this.balance_quotes = balance_quotes; ! } ! public Tag scan() throws ParserException { ! this.startingLineNumber = reader.getLastLineNumber(); ! if (shouldCreateEndTagAndExit()) { ! return createEndTagAndRepositionReader(); ! } ! scanner.beforeScanningStarts(); ! Node currentNode = tag; ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) { ! do { ! currentNode = reader.readElement(balance_quotes); ! if (currentNode==null) continue; ! currLine = reader.getCurrentLine(); ! if (currentNode instanceof Tag) ! doForceCorrectionCheckOn((Tag)currentNode); ! ! doEmptyXmlTagCheckOn(currentNode); ! if (!endTagFound) ! doChildAndEndTagCheckOn(currentNode); ! } ! while (currentNode!=null && !endTagFound); ! } ! if (endTag==null) { ! createCorrectionEndTagBefore(reader.getLastReadPosition()+1); ! } ! ! this.endingLineNumber = reader.getLastLineNumber(); ! return createTag(); ! } ! private boolean shouldCreateEndTagAndExit() { ! return scanner.shouldCreateEndTagAndExit(); ! } ! private Tag createEndTagAndRepositionReader() { ! createCorrectionEndTagBefore(tag.elementBegin()); ! reader.setPosInLine(tag.elementBegin()); ! reader.setDontReadNextLine(true); ! return endTag; ! } ! private void createCorrectionEndTagBefore(int pos) { ! String endTagName = tag.getTagName(); ! int endTagBegin = pos ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! ! private void createCorrectionEndTagBefore(Tag possibleEndTagCauser) { ! String endTagName = tag.getTagName(); ! int endTagBegin = possibleEndTagCauser.elementBegin(); ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! possibleEndTagCauser.setTagBegin(endTagEnd+1); ! reader.addNextParsedNode(possibleEndTagCauser); ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private Tag createTag() throws ParserException { ! CompositeTag newTag = ! (CompositeTag) ! scanner.createTag( ! new TagData( ! tag.elementBegin(), ! endTag.elementEnd(), ! startingLineNumber, ! endingLineNumber, ! tag.getText(), ! currLine, ! url, ! tag.isEmptyXmlTag() ! ), ! new CompositeTagData( ! tag,endTag,nodeList ! ) ! ); ! for (int i=0;i<newTag.getChildCount();i++) { ! Node child = newTag.childAt(i); ! child.setParent(newTag); ! } ! return newTag; ! } ! private void doChildAndEndTagCheckOn(Node currentNode) { ! if (currentNode instanceof EndTag) { ! EndTag possibleEndTag = (EndTag)currentNode; ! if (isExpectedEndTag(possibleEndTag)) { ! endTagFound = true; ! endTag = possibleEndTag; ! return; ! } ! } ! nodeList.add(currentNode); ! scanner.childNodeEncountered(currentNode); ! } ! private boolean isExpectedEndTag(EndTag possibleEndTag) { ! return possibleEndTag.getTagName().equals(tag.getTagName()); ! } ! private void doEmptyXmlTagCheckOn(Node currentNode) { ! if (currentNode instanceof Tag) { ! Tag possibleEndTag = (Tag)currentNode; ! if (isXmlEndTag(tag)) { ! endTag = possibleEndTag; ! endTagFound = true; ! } ! } ! } ! private void doForceCorrectionCheckOn(Tag possibleEndTagCauser) { ! if (isEndTagMissing(possibleEndTagCauser)) { ! createCorrectionEndTagBefore(possibleEndTagCauser); ! endTagFound = true; ! } ! } ! private boolean isEndTagMissing(Tag possibleEndTag) { ! return ! scanner.isTagToBeEndedFor(possibleEndTag) || ! isSelfChildTagRecievedIncorrectly(possibleEndTag); ! } ! private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag) { ! return ( ! !(possibleEndTag instanceof EndTag) && ! !scanner.isAllowSelfChildren() && ! possibleEndTag.getTagName().equals(tag.getTagName()) ! ); ! } ! ! public boolean isXmlEndTag(Tag tag) { ! String tagText = tag.getText(); ! int lastSlash = tagText.lastIndexOf("/"); ! return (lastSlash == tagText.length()-1 || tag.isEmptyXmlTag()) && tag.getText().indexOf("://")==-1; ! } } Index: ParserHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ParserHelper.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** ParserHelper.java 24 Aug 2003 21:59:42 -0000 1.13 --- ParserHelper.java 3 Sep 2003 23:36:19 -0000 1.14 *************** *** 42,106 **** public class ParserHelper implements Serializable { ! public ParserHelper() { ! super(); ! } ! /** ! * Opens a connection using the given url. ! * @param url The url to open. ! * @param feedback The ibject to use for messages or <code>null</code>. ! * @exception ParserException if an i/o exception occurs accessing the url. ! */ ! public static URLConnection openConnection (URL url, ParserFeedback feedback) ! throws ! ParserException ! { ! URLConnection ret; ! ! try ! { ! ret = url.openConnection (); ! } ! catch (IOException ioe) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! ! return (ret); ! } ! /** ! * Opens a connection based on a given string. ! * The string is either a file, in which case <code>file://localhost</code> ! * is prepended to a canonical path derived from the string, or a url that ! * begins with one of the known protocol strings, i.e. <code>http://</code>. * Embedded spaces are silently converted to %20 sequences. ! * @param string The name of a file or a url. ! * @param feedback The object to use for messages or <code>null</code> for no feedback. ! * @exception ParserException if the string is not a valid url or file. ! */ ! public static URLConnection openConnection (String string, ParserFeedback feedback) ! throws ! ParserException ! { ! final String prefix = "file://localhost"; ! String resource; ! URL url; ! StringBuffer buffer; ! URLConnection ret; ! ! try ! { ! url = new URL (LinkProcessor.fixSpaces (string)); ! ret = ParserHelper.openConnection (url, feedback); ! } ! catch (MalformedURLException murle) ! { // try it as a file ! try ! { File file = new File (string); resource = file.getCanonicalPath (); --- 42,106 ---- public class ParserHelper implements Serializable { ! public ParserHelper() { ! super(); ! } ! /** ! * Opens a connection using the given url. ! * @param url The url to open. ! * @param feedback The ibject to use for messages or <code>null</code>. ! * @exception ParserException if an i/o exception occurs accessing the url. ! */ ! public static URLConnection openConnection (URL url, ParserFeedback feedback) ! throws ! ParserException ! { ! URLConnection ret; ! ! try ! { ! ret = url.openConnection (); ! } ! catch (IOException ioe) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm (); ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! ! return (ret); ! } ! /** ! * Opens a connection based on a given string. ! * The string is either a file, in which case <code>file://localhost</code> ! * is prepended to a canonical path derived from the string, or a url that ! * begins with one of the known protocol strings, i.e. <code>http://</code>. * Embedded spaces are silently converted to %20 sequences. ! * @param string The name of a file or a url. ! * @param feedback The object to use for messages or <code>null</code> for no feedback. ! * @exception ParserException if the string is not a valid url or file. ! */ ! public static URLConnection openConnection (String string, ParserFeedback feedback) ! throws ! ParserException ! { ! final String prefix = "file://localhost"; ! String resource; ! URL url; ! StringBuffer buffer; ! URLConnection ret; ! ! try ! { ! url = new URL (LinkProcessor.fixSpaces (string)); ! ret = ParserHelper.openConnection (url, feedback); ! } ! catch (MalformedURLException murle) ! { // try it as a file ! try ! { File file = new File (string); resource = file.getCanonicalPath (); *************** *** 110,189 **** buffer.append ("/"); buffer.append (resource); ! url = new URL (LinkProcessor.fixSpaces (buffer.toString ())); ! ret = ParserHelper.openConnection (url, feedback); ! if (null != feedback) ! feedback.info (url.toExternalForm ()); ! } ! catch (MalformedURLException murle2) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, murle2); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! catch (IOException ioe) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; } ! } ! ! return (ret); ! } ! /** ! * Lookup a character set name. ! * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> ! * This uses reflection so the code will still run under prior JDK's but ! * in that case the default is always returned. ! * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. ! */ ! public static String findCharset (String name, String _default) ! { ! String ret; ! try ! { ! Class cls; ! java.lang.reflect.Method method; ! Object object; ! cls = Class.forName ("java.nio.charset.Charset"); ! method = cls.getMethod ("forName", new Class[] { String.class }); ! object = method.invoke (null, new Object[] { name }); ! method = cls.getMethod ("name", new Class[] { }); ! object = method.invoke (object, new Object[] { }); ! ret = (String)object; ! } ! catch (ClassNotFoundException cnfe) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (NoSuchMethodException nsme) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (IllegalAccessException ia) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (java.lang.reflect.InvocationTargetException ita) ! { ! // java.nio.charset.IllegalCharsetNameException ! // and java.nio.charset.UnsupportedCharsetException ! // return the default ! ret = _default; ! } ! return (ret); ! } } --- 110,189 ---- buffer.append ("/"); buffer.append (resource); ! url = new URL (LinkProcessor.fixSpaces (buffer.toString ())); ! ret = ParserHelper.openConnection (url, feedback); ! if (null != feedback) ! feedback.info (url.toExternalForm ()); ! } ! catch (MalformedURLException murle2) { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, murle2); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; } ! catch (IOException ioe) ! { ! String msg = "HTMLParser.openConnection() : Error in opening a connection to " + string; ! ParserException ex = new ParserException (msg, ioe); ! if (null != feedback) ! feedback.error (msg, ex); ! throw ex; ! } ! } ! ! return (ret); ! } ! /** ! * Lookup a character set name. ! * <em>Vacuous for JVM's without <code>java.nio.charset</code>.</em> ! * This uses reflection so the code will still run under prior JDK's but ! * in that case the default is always returned. ! * @param name The name to look up. One of the aliases for a character set. ! * @param _default The name to return if the lookup fails. ! */ ! public static String findCharset (String name, String _default) ! { ! String ret; ! try ! { ! Class cls; ! java.lang.reflect.Method method; ! Object object; ! cls = Class.forName ("java.nio.charset.Charset"); ! method = cls.getMethod ("forName", new Class[] { String.class }); ! object = method.invoke (null, new Object[] { name }); ! method = cls.getMethod ("name", new Class[] { }); ! object = method.invoke (object, new Object[] { }); ! ret = (String)object; ! } ! catch (ClassNotFoundException cnfe) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (NoSuchMethodException nsme) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (IllegalAccessException ia) ! { ! // for reflection exceptions, assume the name is correct ! ret = name; ! } ! catch (java.lang.reflect.InvocationTargetException ita) ! { ! // java.nio.charset.IllegalCharsetNameException ! // and java.nio.charset.UnsupportedCharsetException ! // return the default ! ret = _default; ! } ! return (ret); ! } } Index: ScriptScannerHelper.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/ScriptScannerHelper.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** ScriptScannerHelper.java 24 Aug 2003 21:59:42 -0000 1.8 --- ScriptScannerHelper.java 3 Sep 2003 23:36:19 -0000 1.9 *************** *** 38,214 **** public class ScriptScannerHelper { ! private int endTagLoc; ! private Tag endTag; ! private Tag startTag; ! private int startingPos; ! private boolean sameLine; ! private boolean endTagFound; ! private NodeReader reader; ! private StringBuffer scriptContents; ! private ScriptScanner scriptScanner; ! private Tag tag; ! private String url; ! private String currLine; ! ! public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) { ! this.reader = nodeReader; ! this.scriptScanner = scriptScanner; ! this.tag = tag; ! this.url = url; ! this.currLine = currLine; ! } ! public Tag scan() throws ParserException { ! int startLine = reader.getLastLineNumber(); ! startTag = tag; ! extractScriptTagFrom(currLine); ! if (isScriptEndTagNotFound()) { ! createScriptEndTag(tag, currLine); ! } ! return createScriptTagUsing(url, currLine, startLine); ! } ! ! private Tag createScriptTagUsing(String url, String currLine, int startLine) { ! return scriptScanner.createTag( ! new TagData( ! startTag.elementBegin(), ! endTag.elementEnd(), ! startLine, ! reader.getLastLineNumber(), ! startTag.getText(), ! currLine, ! url, ! false ! ), new CompositeTagData( ! startTag,endTag,createChildrenNodeList() ! ) ! ); ! } ! private NodeList createChildrenNodeList() { ! NodeList childrenNodeList = new NodeList(); ! childrenNodeList.add( ! new StringNode( ! scriptContents, ! startTag.elementEnd(), ! endTag.elementBegin()-1 ! ) ! ); ! return childrenNodeList; ! } ! private void createScriptEndTag(Tag tag, String currLine) { ! // If end tag doesn't exist, create one ! String endTagName = tag.getTagName(); ! int endTagBegin = reader.getLastReadPosition()+1 ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private boolean isScriptEndTagNotFound() { ! return endTag == null; ! } ! private void extractScriptTagFrom(String currLine) throws ParserException { ! String line = null; ! scriptContents = new StringBuffer(); ! endTagFound = false; ! ! endTag = null; ! line = currLine; ! sameLine = true; ! startingPos = startTag.elementEnd(); ! do { ! doExtractionOfScriptContentsFrom(line); ! if (!endTagFound) { ! line = reader.getNextLine(); ! startingPos = 0; ! } ! if (sameLine) ! sameLine = false; ! } ! while (line!=null && !endTagFound); ! } ! private void doExtractionOfScriptContentsFrom(String line) throws ParserException { ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos); ! findStartingAndEndingLocations(line); ! ! if (endTagLoc!=-1) { ! extractEndTagFrom(line); ! } else { ! continueParsing(line); ! } ! } ! private void continueParsing(String line) { ! if (sameLine) ! scriptContents.append( ! line.substring( ! startTag.elementEnd()+1 ! ) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line); ! } ! } ! private void extractEndTagFrom(String line) throws ParserException { ! endTagFound = true; ! endTag = (EndTag)EndTag.find(line,endTagLoc); ! if (sameLine) ! scriptContents.append( ! getCodeBetweenStartAndEndTags( ! line, ! startTag, ! endTagLoc) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line.substring(0,endTagLoc)); ! } ! ! reader.setPosInLine(endTag.elementEnd()); ! } ! private void findStartingAndEndingLocations(String line) { ! while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) { ! startingPos = endTagLoc+scriptScanner.getEndTag().length(); ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos); ! } ! } ! public String getCodeBetweenStartAndEndTags( ! String line, ! Tag startTag, ! int endTagLoc) throws ParserException { ! try { ! ! return line.substring( ! startTag.elementEnd()+1, ! endTagLoc ! ); ! } ! catch (Exception e) { ! StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n"); ! msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n"); ! msg.append("substring ends at: "+(endTagLoc)); ! throw new ParserException(msg.toString(),e); ! } ! } ! private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) { ! if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false; ! char charAfterSuspectedEndTag = ! line.charAt(endTagLoc+scriptScanner.getEndTag().length()); ! return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\''; ! } } --- 38,214 ---- public class ScriptScannerHelper { ! private int endTagLoc; ! private Tag endTag; ! private Tag startTag; ! private int startingPos; ! private boolean sameLine; ! private boolean endTagFound; ! private NodeReader reader; ! private StringBuffer scriptContents; ! private ScriptScanner scriptScanner; ! private Tag tag; ! private String url; ! private String currLine; ! ! public ScriptScannerHelper(Tag tag, String url, NodeReader nodeReader, String currLine, ScriptScanner scriptScanner) { ! this.reader = nodeReader; ! this.scriptScanner = scriptScanner; ! this.tag = tag; ! this.url = url; ! this.currLine = currLine; ! } ! public Tag scan() throws ParserException { ! int startLine = reader.getLastLineNumber(); ! startTag = tag; ! extractScriptTagFrom(currLine); ! if (isScriptEndTagNotFound()) { ! createScriptEndTag(tag, currLine); ! } ! return createScriptTagUsing(url, currLine, startLine); ! } ! ! private Tag createScriptTagUsing(String url, String currLine, int startLine) { ! return scriptScanner.createTag( ! new TagData( ! startTag.elementBegin(), ! endTag.elementEnd(), ! startLine, ! reader.getLastLineNumber(), ! startTag.getText(), ! currLine, ! url, ! false ! ), new CompositeTagData( ! startTag,endTag,createChildrenNodeList() ! ) ! ); ! } ! private NodeList createChildrenNodeList() { ! NodeList childrenNodeList = new NodeList(); ! childrenNodeList.add( ! new StringNode( ! scriptContents, ! startTag.elementEnd(), ! endTag.elementBegin()-1 ! ) ! ); ! return childrenNodeList; ! } ! private void createScriptEndTag(Tag tag, String currLine) { ! // If end tag doesn't exist, create one ! String endTagName = tag.getTagName(); ! int endTagBegin = reader.getLastReadPosition()+1 ; ! int endTagEnd = endTagBegin + endTagName.length() + 2; ! endTag = new EndTag( ! new TagData( ! endTagBegin, ! endTagEnd, ! endTagName, ! currLine ! ) ! ); ! } ! private boolean isScriptEndTagNotFound() { ! return endTag == null; ! } ! private void extractScriptTagFrom(String currLine) throws ParserException { ! String line = null; ! scriptContents = new StringBuffer(); ! endTagFound = false; ! ! endTag = null; ! line = currLine; ! sameLine = true; ! startingPos = startTag.elementEnd(); ! do { ! doExtractionOfScriptContentsFrom(line); ! if (!endTagFound) { ! line = reader.getNextLine(); ! startingPos = 0; ! } ! if (sameLine) ! sameLine = false; ! } ! while (line!=null && !endTagFound); ! } ! private void doExtractionOfScriptContentsFrom(String line) throws ParserException { ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(),startingPos); ! findStartingAndEndingLocations(line); ! ! if (endTagLoc!=-1) { ! extractEndTagFrom(line); ! } else { ! continueParsing(line); ! } ! } ! private void continueParsing(String line) { ! if (sameLine) ! scriptContents.append( ! line.substring( ! startTag.elementEnd()+1 ! ) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line); ! } ! } ! private void extractEndTagFrom(String line) throws ParserException { ! endTagFound = true; ! endTag = (EndTag)EndTag.find(line,endTagLoc); ! if (sameLine) ! scriptContents.append( ! getCodeBetweenStartAndEndTags( ! line, ! startTag, ! endTagLoc) ! ); ! else { ! scriptContents.append(Parser.getLineSeparator()); ! scriptContents.append(line.substring(0,endTagLoc)); ! } ! ! reader.setPosInLine(endTag.elementEnd()); ! } ! private void findStartingAndEndingLocations(String line) { ! while (endTagLoc>0 && isThisEndTagLocationFalseMatch(line, endTagLoc)) { ! startingPos = endTagLoc+scriptScanner.getEndTag().length(); ! endTagLoc = line.toUpperCase().indexOf(scriptScanner.getEndTag(), startingPos); ! } ! } ! public String getCodeBetweenStartAndEndTags( ! String line, ! Tag startTag, ! int endTagLoc) throws ParserException { ! try { ! ! return line.substring( ! startTag.elementEnd()+1, ! endTagLoc ! ); ! } ! catch (Exception e) { ! StringBuffer msg = new StringBuffer("Error in getCodeBetweenStartAndEndTags():\n"); ! msg.append("substring starts at: "+(startTag.elementEnd()+1)).append("\n"); ! msg.append("substring ends at: "+(endTagLoc)); ! throw new ParserException(msg.toString(),e); ! } ! } ! private boolean isThisEndTagLocationFalseMatch(String line, int endTagLoc) { ! if (endTagLoc+scriptScanner.getEndTag().length() > line.length()-1) return false; ! char charAfterSuspectedEndTag = ! line.charAt(endTagLoc+scriptScanner.getEndTag().length()); ! return charAfterSuspectedEndTag=='"' || charAfterSuspectedEndTag=='\''; ! } } Index: StringParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/StringParser.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** StringParser.java 24 Aug 2003 21:59:42 -0000 1.36 --- StringParser.java 3 Sep 2003 23:36:19 -0000 1.37 *************** *** 34,41 **** public class StringParser { ! private final static int BEFORE_PARSE_BEGINS_STATE=0; ! private final static int PARSE_HAS_BEGUN_STATE=1; ! private final static int PARSE_COMPLETED_STATE=2; ! private final static int PARSE_IGNORE_STATE=3; /** --- 34,41 ---- public class StringParser { ! private final static int BEFORE_PARSE_BEGINS_STATE=0; ! private final static int PARSE_HAS_BEGUN_STATE=1; ! private final static int PARSE_COMPLETED_STATE=2; ! private final static int PARSE_IGNORE_STATE=3; /** *************** *** 65,89 **** } ! /** ! * Locate the StringNode within the input string, by parsing from the given position ! * @param reader HTML reader to be provided so as to allow reading of next line ! * @param input Input String ! * @param position Position to start parsing from ! * @param balance_quotes If <code>true</code> enter ignoring state on * encountering quotes. ! */ ! public Node find(NodeReader reader,String input,int position, boolean balance_quotes) ! { ! StringBuffer textBuffer = new StringBuffer(); ! int state = BEFORE_PARSE_BEGINS_STATE; ! int textBegin=position; ! int textEnd=position; ! int inputLen = input.length(); ! char ch; char ignore_ender = '\"'; ! for (int i=position;(i<inputLen && state!=PARSE_COMPLETED_STATE);i++) ! { ! ch = input.charAt(i); ! if (ch=='<' && state!=PARSE_IGNORE_STATE) { if (beginTag (input, i)) --- 65,89 ---- } ! /** ! * Locate the StringNode within the input string, by parsing from the given position ! * @param reader HTML reader to be provided so as to allow reading of next line ! * @param input Input String ! * @param position Position to start parsing from ! * @param balance_quotes If <code>true</code> enter ignoring state on * encountering quotes. ! */ ! public Node find(NodeReader reader,String input,int position, boolean balance_quotes) ! { ! StringBuffer textBuffer = new StringBuffer(); ! int state = BEFORE_PARSE_BEGINS_STATE; ! int textBegin=position; ! int textEnd=position; ! int inputLen = input.length(); ! char ch; char ignore_ender = '\"'; ! for (int i=position;(i<inputLen && state!=PARSE_COMPLETED_STATE);i++) ! { ! ch = input.charAt(i); ! if (ch=='<' && state!=PARSE_IGNORE_STATE) { if (beginTag (input, i)) *************** *** 92,142 **** textEnd=i-1; } ! } ! if (balance_quotes && (ch=='\'' || ch=='"')) { ! if (state==PARSE_IGNORE_STATE) { if (ch == ignore_ender) state=PARSE_HAS_BEGUN_STATE; } ! else { ignore_ender = ch; state = PARSE_IGNORE_STATE; ! } ! } ! if (state==BEFORE_PARSE_BEGINS_STATE) ! { ! state=PARSE_HAS_BEGUN_STATE; ! } ! if (state==PARSE_HAS_BEGUN_STATE || state==PARSE_IGNORE_STATE) ! { ! textBuffer.append(input.charAt(i)); ! } ! // Patch by Cedric Rosa ! if (state==BEFORE_PARSE_BEGINS_STATE && i==inputLen-1) ! state=PARSE_HAS_BEGUN_STATE; ! if (state==PARSE_HAS_BEGUN_STATE && i==inputLen-1) ! { ! do { ! input = reader.getNextLine(); ! if (input!=null && input.length()==0) ! textBuffer.append(Parser.getLineSeparator()); ! } ! while (input!=null && input.length()==0); ! ! if (input==null) { ! textEnd=i; ! state =PARSE_COMPLETED_STATE; ! ! } else { ! textBuffer.append(Parser.getLineSeparator()); ! inputLen = input.length(); ! i=-1; ! } ! } ! } ! return reader.getParser().getStringNodeFactory().createStringNode(textBuffer, textBegin, textEnd); ! } } --- 92,142 ---- textEnd=i-1; } ! } ! if (balance_quotes && (ch=='\'' || ch=='"')) { ! if (state==PARSE_IGNORE_STATE) { if (ch == ignore_ender) state=PARSE_HAS_BEGUN_STATE; } ! else { ignore_ender = ch; state = PARSE_IGNORE_STATE; ! } ! } ! if (state==BEFORE_PARSE_BEGINS_STATE) ! { ! state=PARSE_HAS_BEGUN_STATE; ! } ! if (state==PARSE_HAS_BEGUN_STATE || state==PARSE_IGNORE_STATE) ! { ! textBuffer.append(input.charAt(i)); ! } ! // Patch by Cedric Rosa ! if (state==BEFORE_PARSE_BEGINS_STATE && i==inputLen-1) ! state=PARSE_HAS_BEGUN_STATE; ! if (state==PARSE_HAS_BEGUN_STATE && i==inputLen-1) ! { ! do { ! input = reader.getNextLine(); ! if (input!=null && input.length()==0) ! textBuffer.append(Parser.getLineSeparator()); ! } ! while (input!=null && input.length()==0); ! ! if (input==null) { ! textEnd=i; ! state =PARSE_COMPLETED_STATE; ! ! } else { ! textBuffer.append(Parser.getLineSeparator()); ! inputLen = input.length(); ! i=-1; ! } ! } ! } ! return reader.getParser().getStringNodeFactory().createStringNode(textBuffer, textBegin, textEnd); ! } } Index: TagParser.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserHelper/TagParser.java,v retrieving revision 1.39 retrieving revision 1.40 diff -C2 -d -r1.39 -r1.40 *** TagParser.java 24 Aug 2003 21:59:42 -0000 1.39 --- TagParser.java 3 Sep 2003 23:36:19 -0000 1.40 *************** *** 38,244 **** public class TagParser { ! public final static int TAG_BEFORE_PARSING_STATE=1; ! public final static int TAG_BEGIN_PARSING_STATE=1<<2; ! public final static int TAG_FINISHED_PARSING_STATE=1<<3; ! public final static int TAG_ILLEGAL_STATE=1<<4; ! public final static int TAG_IGNORE_DATA_STATE=1<<5; ! public final static int TAG_IGNORE_BEGIN_TAG_STATE=1<<6; ! public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE=1<<7; ! ! public final static String ENCOUNTERED_QUERY_MESSAGE = "TagParser : Encountered > after a query. Accepting without correction and continuing parsing"; ! ! private ParserFeedback feedback; ! public TagParser(ParserFeedback feedback) { ! this.feedback = feedback; ! } ! public Tag find(NodeReader reader,String input,int position) { ! int state = TAG_BEFORE_PARSING_STATE; ! int i=position; ! char ch; ! char[] ignorechar = new char[1]; // holds the character we're looking for when in TAG_IGNORE_DATA_STATE ! Tag tag = new Tag(new TagData(position, 0, reader.getLastLineNumber(), 0, "", input, "", false)); ! Bool encounteredQuery = new Bool(false); ! while (i<tag.getTagLine().length() && ! state!=TAG_FINISHED_PARSING_STATE && ! state!=TAG_ILLEGAL_STATE ! ) ! { ! ch = tag.getTagLine().charAt(i); ! state = automataInput(encounteredQuery, i, state, ch, tag, i, ignorechar); ! i = incrementCounter(i, reader, state, tag); ! } ! if (state==TAG_FINISHED_PARSING_STATE) { ! String tagLine = tag.getTagLine(); ! if (i>1 && tagLine.charAt(i-2)=='/') { ! tag.setEmptyXmlTag(true); ! String tagContents = tag.getText(); ! tag.setText(tagContents.substring(0,tagContents.length()-1)); ! } ! return tag; ! } else ! return null; ! } ! private int automataInput(Bool encounteredQuery, int i, int state,char ch, Tag tag, int pos, char[] ignorechar) { ! state = checkIllegalState(i, state, ch, tag); ! state = checkFinishedState(encounteredQuery, i, state, ch, tag, pos); ! state = toggleIgnoringState(state, ch, ignorechar); ! if (state==TAG_BEFORE_PARSING_STATE && ch!='<') { ! state= TAG_ILLEGAL_STATE; ! } ! if (state==TAG_IGNORE_DATA_STATE && ch=='<') { ! // If the next tag char is is close tag, then ! // this is legal, we should continue ! if (!isWellFormedTag(tag,pos)) ! state = TAG_IGNORE_BEGIN_TAG_STATE; ! } ! if (state==TAG_IGNORE_BEGIN_TAG_STATE && ch=='>') { ! state = TAG_IGNORE_DATA_STATE; ! } ! checkIfAppendable(encounteredQuery, state, ch, tag); ! state = checkBeginParsingState(i, state, ch, tag); ! return state; ! } ! private int checkBeginParsingState(int i, int state, char ch, Tag tag) { ! if (ch=='<' && ! (state==TAG_BEFORE_PARSING_STATE || ! state==TAG_ILLEGAL_STATE)) ! { ! // Transition from State 0 to State 1 - Record data till > is encountered ! tag.setTagBegin(i); ! state = TAG_BEGIN_PARSING_STATE; ! } ! return state; ! } ! private boolean isWellFormedTag(Tag tag, int pos) { ! String inputLine = tag.getTagLine(); ! int closeTagPos = inputLine.indexOf('>',pos+1); ! int openTagPos = inputLine.indexOf('<',pos+1); ! return openTagPos > closeTagPos || (openTagPos ==-1 && closeTagPos!=-1); ! } ! ! private int checkFinishedState(Bool encounteredQuery, int i, int state, char ch, Tag tag, int pos) { ! if (ch=='>') ! { ! if (state==TAG_BEGIN_PARSING_STATE) ! { ! state = TAG_FINISHED_PARSING_STATE; ! tag.setTagEnd(i); ! } ! else ! if (state==TAG_IGNORE_DATA_STATE) { ! if (encounteredQuery.getBoolean()) { ! encounteredQuery.setBoolean(false); ! feedback.info(ENCOUNTERED_QUERY_MESSAGE); ! return state; ! } ! // Now, either this is a valid > input, and should be ignored, ! // or it is a mistake in the html, in which case we need to correct it *sigh* ! if (isWellFormedTag(tag,pos)) return state; ! ! state = TAG_FINISHED_PARSING_STATE; ! tag.setTagEnd(i); ! // Do Correction ! // Correct the tag - assuming its grouped into name value pairs ! // Remove all inverted commas. ! correctTag(tag); ! ! StringBuffer msg = new StringBuffer(); ! msg.append("HTMLTagParser : Encountered > inside inverted commas in line \n"); ! msg.append(tag.getTagLine()); ! msg.append(", location "); ! msg.append(i); ! msg.append("\n"); ! for (int j=0;j<i;j++) msg.append(' '); ! msg.append('^'); ! msg.append("\nAutomatically corrected."); ! feedback.warning(msg.toString()); ! } ! } else ! if (ch=='<' && ! state==TAG_BEGIN_PARSING_STATE && ! tag.getText().charAt(0)!='%' ! ) { ! state = TAG_FINISHED_PARSING_STATE; ! tag.setTagEnd(i-1);i--; ! } ! return state; ! } ! private void checkIfAppendable(Bool encounteredQuery,int state, char ch, Tag tag) { ! if (state==TAG_IGNORE_DATA_STATE || ! state==TAG_BEGIN_PARSING_STATE || ! state==TAG_IGNORE_BEGIN_TAG_STATE) { ! if (ch=='?') ! encounteredQuery.setBoolean(true); ! tag.append(ch); ! } ! } ! private int checkIllegalState(int i, int state, char ch, Tag tag) { ! if (ch=='/' && i>0 && tag.getTagLine().charAt(i-1)=='<' && ! state!=TAG_IGNORE_DATA_STATE && ! state!=TAG_IGNORE_BEGIN_TAG_STATE) ! { ! state = TAG_ILLEGAL_STATE; ! } ! return state; ! } ! ! public void correctTag(Tag tag) { ! String tempText = tag.getText(); ! StringBuffer absorbedText = new StringBuffer(); ! char c; ! for (int j=0;j<tempText.length();j++) { ! c = tempText.charAt(j); ! if (c!='"') ! absorbedText.append(c); ! } ! // Go into the next stage. ! StringBuffer result = insertInvertedCommasCorrectly(absorbedText); ! tag.setText(result.toString()); ! } ! public StringBuffer insertInvertedCommasCorrectly(StringBuffer absorbedText) { ! StringBuffer result = new StringBuffer(); ! StringTokenizer tok = new StringTokenizer(absorbedText.toString(),"=",false); ! String token; ! token= (String)tok.nextToken(); ! result.append(token+"="); ! for (;tok.hasMoreTokens();) { ! token= (String)tok.nextToken(); ! token = pruneSpaces(token); ! result.append('"'); ! int lastIndex = token.lastIndexOf(' '); ! if (lastIndex!=-1 && tok.hasMoreTokens()) { ! result.append(token.substring(0,lastIndex)); ! result.append('"'); ! result.append(token.substring(lastIndex,token.length())); ! } else result.append(token+'"'); ! if (tok.hasMoreTokens()) result.append("="); ! } ! return result; ! } ! public static String pruneSpaces(String token) { ! int firstSpace; ! int lastSpace; ! firstSpace = token.indexOf(' '); ! while (firstSpace==0) { ! token = token.substring(1,token.length()); ! firstSpace = token.indexOf(' '); ! } ! lastSpace = token.lastIndexOf(' '); ! while (lastSpace==token.length()-1) { ! token = token.substring(0,token.length()-1); ! lastSpace = token.lastIndexOf(' '); ! } ! return token; ! } /** --- 38,244 ---- public class TagParser { ! public final static int TAG_BEFORE_PARSING_STATE=1; ! public final static int TAG_BEGIN_PARSING_STATE=1<<2; ! public final static int TAG_FINISHED_PARSING_STATE=1<<3; ! public final static int TAG_ILLEGAL_STATE=1<<4; ! public final static int TAG_IGNORE_DATA_STATE=1<<5; ! public final static int TAG_IGNORE_BEGIN_TAG_STATE=1<<6; ! public final static int TAG_IGNORE_CHAR_SINGLE_QUOTE=1<<7; ! ! public final static String ENCOUNTERED_QUERY_MESSAGE = "TagParser : Encountered > after a query. Accepting without correction and continuing parsing"; ! ! private ParserFeedback feedback; ! public TagParser(ParserFeedback feedback) { ! this.feedback = feedback; ! } ! public Tag find(NodeReader reader,String input,int position) { ! int state = TAG_BEFORE_PARSING_STATE; ! int i=position; ! char ch; ! char[] ignorechar = new char[1]; // holds the character we're looking for when in TAG_IGNORE_DATA_STATE ! Tag tag = new Tag(new TagData(position, 0, reader.getLastLineNumber(), 0, "", input, "", false)); ! Bool encounteredQuery = new Bool(false); ! while (i<tag.getTagLine().length() && ! state!=TAG_FINISHED_PARSING_STATE && ! state!=TAG_ILLEGAL_STATE ! ) ! { ! ch = tag.getTagLine().charAt(i); ! state = automataInput(encounteredQuery, i, state, ch, tag, i, ignorechar); ! i = incrementCounter(i, reader, state, tag); ! } ! if (state==TAG_FINISHED_PARSING_STATE) { ! String tagLine = tag.getTagLine(); ! if (i>1 && tagLine.charAt(i-2)=='/') { ! tag.setEmptyXmlTag(true); ! String tagContents = tag.getText(); ! tag.setText(tagContents.substring(0,tagContents.length()-1)); ! } ! return tag; ! } else ! return null; ! } ! private int automataInput(Bool encounteredQuery, int i, int state,char ch, Tag tag, int pos, char[] ignorechar) { ! state = checkIllegalState(i, state, ch, tag); ! state = checkFinishedState(encounteredQuery, i, state, ch, tag, pos); ! state = toggleIgnoringState(state, ch, ignorechar); ! if (state==TAG_BEFORE_PARSING_STATE && ch!='<') { ! state= TAG_ILLEGAL_STATE; ! } ! if (state==TAG_IGNORE_DATA_STATE && ch=='<') { ! // If the next tag char is is close tag, then ! // this is legal, we should continue ! if (!isWellFormedTag(tag,pos)) ! state = TAG_IGNORE_BEGIN_TAG_STATE; ! } ! if (state==TAG_IGNORE_BEGIN_TAG_STATE && ch=='>') { ! state = TAG_IGNORE_DATA_STATE; ! } ! checkIfAppendable(encounteredQuery, state, ch, tag); ! state = checkBeginParsingState(i, state, ch, tag); ! return state; ! } ! private int checkBeginParsingState(int i, int state, char ch, Tag tag) { ! if (ch=='<' && ! (state==TAG_BEFORE_PARSING_STATE || ! state==TAG_ILLEGAL_STATE)) ! { ! // Transition from State 0 to State 1 - Record data till > i... [truncated message content] |
From: <der...@us...> - 2003-09-03 23:36:53
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data In directory sc8-pr-cvs1:/tmp/cvs-serv31228/tags/data Modified Files: CompositeTagData.java FormData.java LinkData.java TagData.java Log Message: Change tabs to spaces in all source files. Index: CompositeTagData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/CompositeTagData.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** CompositeTagData.java 24 Aug 2003 21:59:42 -0000 1.29 --- CompositeTagData.java 3 Sep 2003 23:36:20 -0000 1.30 *************** *** 34,69 **** public class CompositeTagData { ! private Tag startTag; ! private Tag endTag; ! private NodeList children; ! ! public CompositeTagData( ! Tag startTag, Tag endTag, NodeList children) { ! this.startTag = startTag; ! this.endTag = endTag; ! this.children = new NodeList(); ! if (children!=null) ! for (SimpleNodeIterator i = children.elements();i.hasMoreNodes();) { ! this.children.add(i.nextNode()); ! } ! } ! public NodeList getChildren() { ! return children; ! } ! public Tag getEndTag() { ! return endTag; ! } ! public Tag getStartTag() { ! return startTag; ! } ! ! public String toString() { ! StringBuffer childrenString = new StringBuffer(); ! for (int i=0;i<children.size();i++) ! childrenString.append(children.elementAt(i).toPlainTextString()); ! return childrenString.toString(); ! } } --- 34,69 ---- public class CompositeTagData { ! private Tag startTag; ! private Tag endTag; ! private NodeList children; ! ! public CompositeTagData( ! Tag startTag, Tag endTag, NodeList children) { ! this.startTag = startTag; ! this.endTag = endTag; ! this.children = new NodeList(); ! if (children!=null) ! for (SimpleNodeIterator i = children.elements();i.hasMoreNodes();) { ! this.children.add(i.nextNode()); ! } ! } ! public NodeList getChildren() { ! return children; ! } ! public Tag getEndTag() { ! return endTag; ! } ! public Tag getStartTag() { ! return startTag; ! } ! ! public String toString() { ! StringBuffer childrenString = new StringBuffer(); ! for (int i=0;i<children.size();i++) ! childrenString.append(children.elementAt(i).toPlainTextString()); ! return childrenString.toString(); ! } } Index: FormData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/FormData.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** FormData.java 24 Aug 2003 21:59:42 -0000 1.24 --- FormData.java 3 Sep 2003 23:36:20 -0000 1.25 *************** *** 32,69 **** public class FormData { ! private String formURL; ! private String formName; ! private String formMethod; ! private Vector formInputVector; ! private Vector textAreaVector; ! ! public FormData(String formURL, String formName, String formMethod, ! Vector formInputVector, Vector textAreaVector) { ! this.formURL = formURL; ! this.formName = formName; ! this.formMethod = formMethod; ! this.formInputVector = formInputVector; ! this.textAreaVector = textAreaVector; ! } ! public Vector getFormInputVector() { ! return formInputVector; ! } ! public String getFormMethod() { ! return formMethod; ! } ! public String getFormName() { ! return formName; ! } ! public String getFormURL() { ! return formURL; ! } ! public Vector getTextAreaVector() { ! return textAreaVector; ! } } --- 32,69 ---- public class FormData { ! private String formURL; ! private String formName; ! private String formMethod; ! private Vector formInputVector; ! private Vector textAreaVector; ! ! public FormData(String formURL, String formName, String formMethod, ! Vector formInputVector, Vector textAreaVector) { ! this.formURL = formURL; ! this.formName = formName; ! this.formMethod = formMethod; ! this.formInputVector = formInputVector; ! this.textAreaVector = textAreaVector; ! } ! public Vector getFormInputVector() { ! return formInputVector; ! } ! public String getFormMethod() { ! return formMethod; ! } ! public String getFormName() { ! return formName; ! } ! public String getFormURL() { ! return formURL; ! } ! public Vector getTextAreaVector() { ! return textAreaVector; ! } } Index: LinkData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/LinkData.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** LinkData.java 24 Aug 2003 21:59:43 -0000 1.26 --- LinkData.java 3 Sep 2003 23:36:20 -0000 1.27 *************** *** 30,70 **** public class LinkData { ! private String link; ! private String linkText; ! private String accessKey; ! private boolean mailLink; ! private boolean javascriptLink; ! ! public LinkData(String link,String linkText,String accessKey,boolean mailLink, ! boolean javascriptLink) { ! this.link = link; ! this.linkText = linkText; ! this.accessKey = accessKey; ! this.mailLink = mailLink; ! this.javascriptLink = javascriptLink; ! } ! public String getAccessKey() { ! return accessKey; ! } ! public boolean isJavascriptLink() { ! return javascriptLink; ! } ! public String getLink() { ! return link; ! } ! public String getLinkText() { ! return linkText; ! } ! public boolean isMailLink() { ! return mailLink; ! } ! public String toString() { ! return ""; ! } } --- 30,70 ---- public class LinkData { ! private String link; ! private String linkText; ! private String accessKey; ! private boolean mailLink; ! private boolean javascriptLink; ! ! public LinkData(String link,String linkText,String accessKey,boolean mailLink, ! boolean javascriptLink) { ! this.link = link; ! this.linkText = linkText; ! this.accessKey = accessKey; ! this.mailLink = mailLink; ! this.javascriptLink = javascriptLink; ! } ! public String getAccessKey() { ! return accessKey; ! } ! public boolean isJavascriptLink() { ! return javascriptLink; ! } ! public String getLink() { ! return link; ! } ! public String getLinkText() { ! return linkText; ! } ! public boolean isMailLink() { ! return mailLink; ! } ! public String toString() { ! return ""; ! } } Index: TagData.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/data/TagData.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** TagData.java 24 Aug 2003 21:59:43 -0000 1.27 --- TagData.java 3 Sep 2003 23:36:20 -0000 1.28 *************** *** 30,110 **** public class TagData { ! private int tagBegin; ! private int tagEnd; ! private int startLine; ! private int endLine; ! private String tagContents; ! private String tagLine; ! private String urlBeingParsed; ! private boolean isXmlEndTag; ! ! public TagData(int tagBegin, int tagEnd, String tagContents,String tagLine) { ! this(tagBegin, tagEnd, 0, 0, tagContents, tagLine, "", false); ! } ! ! public TagData(int tagBegin, int tagEnd, String tagContents,String tagLine, String urlBeingParsed) { ! this(tagBegin, tagEnd, 0, 0, tagContents, tagLine, urlBeingParsed, false); ! } ! ! public TagData(int tagBegin, int tagEnd, int startLine, int endLine, String tagContents,String tagLine, String urlBeingParsed, boolean isXmlEndTag) { ! this.tagBegin = tagBegin; ! this.tagEnd = tagEnd; ! this.startLine = startLine; ! this.endLine = endLine; ! this.tagContents = tagContents; ! this.tagLine = tagLine; ! this.urlBeingParsed = urlBeingParsed; ! this.isXmlEndTag = isXmlEndTag; ! } ! ! public int getTagBegin() { ! return tagBegin; ! } ! public String getTagContents() { ! return tagContents; ! } ! public int getTagEnd() { ! return tagEnd; ! } ! public String getTagLine() { ! return tagLine; ! } ! public void setTagContents(String tagContents) { ! this.tagContents = tagContents; ! } ! public String getUrlBeingParsed() { ! return urlBeingParsed; ! } ! public void setUrlBeingParsed(String baseUrl) { ! this.urlBeingParsed = baseUrl; ! } ! ! public boolean isEmptyXmlTag() { ! return isXmlEndTag; ! } ! /** ! * Returns the line number where the tag starts in the HTML. At the moment this ! * will only be valid for tags created with the ! * <code>CompositeTagScanner</code> or a subclass of it. ! */ ! public int getStartLine() { ! return startLine; ! } ! /** ! * Returns the line number where the tag ends in the HTML. At the moment this ! * will only be valid for tags created with the ! * <code>CompositeTagScanner</code> or a subclass of it. ! */ ! public int getEndLine() { ! return endLine; ! } ! } --- 30,110 ---- public class TagData { ! private int tagBegin; ! private int tagEnd; ! private int startLine; ! private int endLine; ! private String tagContents; ! private String tagLine; ! private String urlBeingParsed; ! private boolean isXmlEndTag; ! ! public TagData(int tagBegin, int tagEnd, String tagContents,String tagLine) { ! this(tagBegin, tagEnd, 0, 0, tagContents, tagLine, "", false); ! } ! ! public TagData(int tagBegin, int tagEnd, String tagContents,String tagLine, String urlBeingParsed) { ! this(tagBegin, tagEnd, 0, 0, tagContents, tagLine, urlBeingParsed, false); ! } ! ! public TagData(int tagBegin, int tagEnd, int startLine, int endLine, String tagContents,String tagLine, String urlBeingParsed, boolean isXmlEndTag) { ! this.tagBegin = tagBegin; ! this.tagEnd = tagEnd; ! this.startLine = startLine; ! this.endLine = endLine; ! this.tagContents = tagContents; ! this.tagLine = tagLine; ! this.urlBeingParsed = urlBeingParsed; ! this.isXmlEndTag = isXmlEndTag; ! } ! ! public int getTagBegin() { ! return tagBegin; ! } ! public String getTagContents() { ! return tagContents; ! } ! public int getTagEnd() { ! return tagEnd; ! } ! public String getTagLine() { ! return tagLine; ! } ! public void setTagContents(String tagContents) { ! this.tagContents = tagContents; ! } ! public String getUrlBeingParsed() { ! return urlBeingParsed; ! } ! public void setUrlBeingParsed(String baseUrl) { ! this.urlBeingParsed = baseUrl; ! } ! ! public boolean isEmptyXmlTag() { ! return isXmlEndTag; ! } ! /** ! * Returns the line number where the tag starts in the HTML. At the moment this ! * will only be valid for tags created with the ! * <code>CompositeTagScanner</code> or a subclass of it. ! */ ! public int getStartLine() { ! return startLine; ! } ! /** ! * Returns the line number where the tag ends in the HTML. At the moment this ! * will only be valid for tags created with the ! * <code>CompositeTagScanner</code> or a subclass of it. ! */ ! public int getEndLine() { ! return endLine; ! } ! } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv31228 Modified Files: AbstractNode.java Node.java NodeReader.java RemarkNode.java StringNode.java StringNodeFactory.java Log Message: Change tabs to spaces in all source files. Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/AbstractNode.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** AbstractNode.java 24 Aug 2003 21:59:41 -0000 1.10 --- AbstractNode.java 3 Sep 2003 23:36:18 -0000 1.11 *************** *** 37,54 **** */ public abstract class AbstractNode implements Node, Serializable { ! /** ! * The beginning position of the tag in the line ! */ ! protected int nodeBegin; ! /** ! * The ending position of the tag in the line ! */ ! protected int nodeEnd; ! /** ! * The parent of this node. ! */ ! protected Node parent; /** --- 37,54 ---- */ public abstract class AbstractNode implements Node, Serializable { ! /** ! * The beginning position of the tag in the line ! */ ! protected int nodeBegin; ! /** ! * The ending position of the tag in the line ! */ ! protected int nodeEnd; ! /** ! * The parent of this node. ! */ ! protected Node parent; /** *************** *** 62,214 **** * @param end The ending position of the node. */ ! public AbstractNode (int begin, int end) { ! nodeBegin = begin; ! nodeEnd = end; parent = null; ! } ! /** ! * Returns a string representation of the node. This is an important method, it allows a simple string transformation ! * of a web page, regardless of a node.<br> ! * Typical application code (for extracting only the text from a web page) would then be simplified to :<br> ! * <pre> ! * Node node; ! * for (Enumeration e = parser.elements();e.hasMoreElements();) { ! * node = (Node)e.nextElement(); ! * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string ! * } ! * </pre> ! */ ! public abstract String toPlainTextString(); ! /** ! * This method will make it easier when using html parser to reproduce html pages (with or without modifications) ! * Applications reproducing html can use this method on nodes which are to be used or transferred as they were ! * recieved, with the original html ! */ ! public abstract String toHtml(); ! /** ! * Return the string representation of the node. ! * Subclasses must define this method, and this is typically to be used in the manner<br> ! * <pre>System.out.println(node)</pre> ! * @return java.lang.String ! */ ! public abstract String toString(); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * String filter = LinkTag.LINK_TAG_FILTER; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, filter); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. This of course implies that tags must ! * fulfill their responsibilities toward honouring certain filters. ! * ! * <B>Important:</B> In order to keep performance optimal, <B>do not create</B> you own filter strings, as ! * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not ! * make calls like : ! * <I>collectInto(collectionList,"-l")</I>, instead, make calls only like : ! * <I>collectInto(collectionList,LinkTag.LINK_TAG_FILTER)</I>.<P/> ! * ! * To find out if your desired tag has filtering support, check the API of the tag. ! */ ! public abstract void collectInto(NodeList collectionList, String filter); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, LinkTag.class); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. ! */ ! public void collectInto(NodeList collectionList, Class nodeType) { ! if (nodeType.getName().equals(this.getClass().getName())) { ! collectionList.add(this); ! } ! } ! /** ! * Returns the beginning position of the tag. ! */ ! public int elementBegin() { ! return nodeBegin; ! } ! /** ! * Returns the ending position fo the tag ! */ ! public int elementEnd() { ! return nodeEnd; ! } ! public abstract void accept(Object visitor); ! /** ! * @deprecated - use toHtml() instead ! */ ! public final String toHTML() { ! return toHtml(); ! } ! ! /** ! * Get the parent of this node. * This will always return null when parsing without scanners, * i.e. if semantic parsing was not performed. * The object returned from this method can be safely cast to a <code>CompositeTag</code>. ! * @return The parent of this node, if it's been set, <code>null</code> otherwise. ! */ ! public Node getParent () { ! return (parent); ! } /** ! * Sets the parent of this node. ! * @param node The node that contains this node. Must be a <code>CompositeTag</code>. ! */ ! public void setParent (Node node) { ! parent = node; ! } ! /** * Get the children of this node. * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. */ ! public NodeList getChildren () { return (children); --- 62,214 ---- * @param end The ending position of the node. */ ! public AbstractNode (int begin, int end) { ! nodeBegin = begin; ! nodeEnd = end; parent = null; ! } ! /** ! * Returns a string representation of the node. This is an important method, it allows a simple string transformation ! * of a web page, regardless of a node.<br> ! * Typical application code (for extracting only the text from a web page) would then be simplified to :<br> ! * <pre> ! * Node node; ! * for (Enumeration e = parser.elements();e.hasMoreElements();) { ! * node = (Node)e.nextElement(); ! * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string ! * } ! * </pre> ! */ ! public abstract String toPlainTextString(); ! /** ! * This method will make it easier when using html parser to reproduce html pages (with or without modifications) ! * Applications reproducing html can use this method on nodes which are to be used or transferred as they were ! * recieved, with the original html ! */ ! public abstract String toHtml(); ! /** ! * Return the string representation of the node. ! * Subclasses must define this method, and this is typically to be used in the manner<br> ! * <pre>System.out.println(node)</pre> ! * @return java.lang.String ! */ ! public abstract String toString(); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * String filter = LinkTag.LINK_TAG_FILTER; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, filter); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. This of course implies that tags must ! * fulfill their responsibilities toward honouring certain filters. ! * ! * <B>Important:</B> In order to keep performance optimal, <B>do not create</B> you own filter strings, as ! * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not ! * make calls like : ! * <I>collectInto(collectionList,"-l")</I>, instead, make calls only like : ! * <I>collectInto(collectionList,LinkTag.LINK_TAG_FILTER)</I>.<P/> ! * ! * To find out if your desired tag has filtering support, check the API of the tag. ! */ ! public abstract void collectInto(NodeList collectionList, String filter); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, LinkTag.class); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. ! */ ! public void collectInto(NodeList collectionList, Class nodeType) { ! if (nodeType.getName().equals(this.getClass().getName())) { ! collectionList.add(this); ! } ! } ! /** ! * Returns the beginning position of the tag. ! */ ! public int elementBegin() { ! return nodeBegin; ! } ! /** ! * Returns the ending position fo the tag ! */ ! public int elementEnd() { ! return nodeEnd; ! } ! public abstract void accept(Object visitor); ! /** ! * @deprecated - use toHtml() instead ! */ ! public final String toHTML() { ! return toHtml(); ! } ! ! /** ! * Get the parent of this node. * This will always return null when parsing without scanners, * i.e. if semantic parsing was not performed. * The object returned from this method can be safely cast to a <code>CompositeTag</code>. ! * @return The parent of this node, if it's been set, <code>null</code> otherwise. ! */ ! public Node getParent () { ! return (parent); ! } /** ! * Sets the parent of this node. ! * @param node The node that contains this node. Must be a <code>CompositeTag</code>. ! */ ! public void setParent (Node node) { ! parent = node; ! } ! /** * Get the children of this node. * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. */ ! public NodeList getChildren () { return (children); *************** *** 219,223 **** * @param children The new list of children this node contains. */ ! public void setChildren (NodeList children) { this.children = children; --- 219,223 ---- * @param children The new list of children this node contains. */ ! public void setChildren (NodeList children) { this.children = children; *************** *** 225,241 **** /** ! * Returns the text of the string line ! */ ! public String getText() { ! return null; ! } ! ! /** ! * Sets the string contents of the node. ! * @param text The new text for the node. ! */ ! public void setText(String text) { ! } } --- 225,241 ---- /** ! * Returns the text of the string line ! */ ! public String getText() { ! return null; ! } ! ! /** ! * Sets the string contents of the node. ! * @param text The new text for the node. ! */ ! public void setText(String text) { ! } } Index: Node.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Node.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** Node.java 24 Aug 2003 21:59:41 -0000 1.36 --- Node.java 3 Sep 2003 23:36:18 -0000 1.37 *************** *** 34,146 **** public interface Node { ! /** ! * Returns a string representation of the node. This is an important method, it allows a simple string transformation ! * of a web page, regardless of a node.<br> ! * Typical application code (for extracting only the text from a web page) would then be simplified to :<br> ! * <pre> ! * Node node; ! * for (Enumeration e = parser.elements();e.hasMoreElements();) { ! * node = (Node)e.nextElement(); ! * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string ! * } ! * </pre> ! */ ! public abstract String toPlainTextString(); ! /** ! * This method will make it easier when using html parser to reproduce html pages (with or without modifications) ! * Applications reproducing html can use this method on nodes which are to be used or transferred as they were ! * recieved, with the original html ! */ ! public abstract String toHtml(); ! /** ! * Return the string representation of the node. ! * Subclasses must define this method, and this is typically to be used in the manner<br> ! * <pre>System.out.println(node)</pre> ! * @return java.lang.String ! */ ! public abstract String toString(); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * String filter = LinkTag.LINK_TAG_FILTER; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, filter); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. This of course implies that tags must ! * fulfill their responsibilities toward honouring certain filters. ! * ! * <B>Important:</B> In order to keep performance optimal, <B>do not create</B> you own filter strings, as ! * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not ! * make calls like : ! * <I>collectInto(collectionList,"-l")</I>, instead, make calls only like : ! * <I>collectInto(collectionList,LinkTag.LINK_TAG_FILTER)</I>.<P/> ! * ! * To find out if your desired tag has filtering support, check the API of the tag. ! */ ! public abstract void collectInto(NodeList collectionList, String filter); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, LinkTag.class); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. ! */ ! public abstract void collectInto(NodeList collectionList, Class nodeType); ! /** ! * Returns the beginning position of the tag. ! */ ! public abstract int elementBegin(); ! /** ! * Returns the ending position fo the tag ! */ ! public abstract int elementEnd(); ! public abstract void accept(Object visitor); ! /** ! * Get the parent of this node. * This will always return null when parsing without scanners, * i.e. if semantic parsing was not performed. * The object returned from this method can be safely cast to a <code>CompositeTag</code>. ! * @return The parent of this node, if it's been set, <code>null</code> otherwise. ! */ ! public abstract Node getParent (); /** ! * Sets the parent of this node. ! * @param node The node that contains this node. Must be a <code>CompositeTag</code>. ! */ ! public abstract void setParent (Node node); /** --- 34,146 ---- public interface Node { ! /** ! * Returns a string representation of the node. This is an important method, it allows a simple string transformation ! * of a web page, regardless of a node.<br> ! * Typical application code (for extracting only the text from a web page) would then be simplified to :<br> ! * <pre> ! * Node node; ! * for (Enumeration e = parser.elements();e.hasMoreElements();) { ! * node = (Node)e.nextElement(); ! * System.out.println(node.toPlainTextString()); // Or do whatever processing you wish with the plain text string ! * } ! * </pre> ! */ ! public abstract String toPlainTextString(); ! /** ! * This method will make it easier when using html parser to reproduce html pages (with or without modifications) ! * Applications reproducing html can use this method on nodes which are to be used or transferred as they were ! * recieved, with the original html ! */ ! public abstract String toHtml(); ! /** ! * Return the string representation of the node. ! * Subclasses must define this method, and this is typically to be used in the manner<br> ! * <pre>System.out.println(node)</pre> ! * @return java.lang.String ! */ ! public abstract String toString(); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * String filter = LinkTag.LINK_TAG_FILTER; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, filter); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. This of course implies that tags must ! * fulfill their responsibilities toward honouring certain filters. ! * ! * <B>Important:</B> In order to keep performance optimal, <B>do not create</B> you own filter strings, as ! * the internal matching occurs with the pre-existing filter string object (in the relevant class). i.e. do not ! * make calls like : ! * <I>collectInto(collectionList,"-l")</I>, instead, make calls only like : ! * <I>collectInto(collectionList,LinkTag.LINK_TAG_FILTER)</I>.<P/> ! * ! * To find out if your desired tag has filtering support, check the API of the tag. ! */ ! public abstract void collectInto(NodeList collectionList, String filter); ! /** ! * Collect this node and its child nodes (if-applicable) into the collection parameter, provided the node ! * satisfies the filtering criteria. <P/> ! * ! * This mechanism allows powerful filtering code to be written very easily, without bothering about collection ! * of embedded tags separately. e.g. when we try to get all the links on a page, it is not possible to get it ! * at the top-level, as many tags (like form tags), can contain links embedded in them. We could get the links ! * out by checking if the current node is a form tag, and going through its contents. However, this ties us down ! * to specific tags, and is not a very clean approach. <P/> ! * ! * Using collectInto(), programs get a lot shorter. Now, the code to extract all links from a page would look ! * like : ! * <pre> ! * NodeList collectionList = new NodeList(); ! * Node node; ! * for (NodeIterator e = parser.elements(); e.hasMoreNodes();) { ! * node = e.nextNode(); ! * node.collectInto (collectionVector, LinkTag.class); ! * } ! * </pre> ! * Thus, collectionList will hold all the link nodes, irrespective of how ! * deep the links are embedded. ! */ ! public abstract void collectInto(NodeList collectionList, Class nodeType); ! /** ! * Returns the beginning position of the tag. ! */ ! public abstract int elementBegin(); ! /** ! * Returns the ending position fo the tag ! */ ! public abstract int elementEnd(); ! public abstract void accept(Object visitor); ! /** ! * Get the parent of this node. * This will always return null when parsing without scanners, * i.e. if semantic parsing was not performed. * The object returned from this method can be safely cast to a <code>CompositeTag</code>. ! * @return The parent of this node, if it's been set, <code>null</code> otherwise. ! */ ! public abstract Node getParent (); /** ! * Sets the parent of this node. ! * @param node The node that contains this node. Must be a <code>CompositeTag</code>. ! */ ! public abstract void setParent (Node node); /** *************** *** 148,152 **** * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. */ ! public abstract NodeList getChildren (); /** --- 148,152 ---- * @return The list of children contained by this node, if it's been set, <code>null</code> otherwise. */ ! public abstract NodeList getChildren (); /** *************** *** 154,168 **** * @param children The new list of children this node contains. */ ! public abstract void setChildren (NodeList children); ! /** ! * Returns the text of the string line ! */ ! public String getText(); ! ! /** ! * Sets the string contents of the node. ! * @param text The new text for the node. ! */ ! public void setText(String text); } --- 154,168 ---- * @param children The new list of children this node contains. */ ! public abstract void setChildren (NodeList children); ! /** ! * Returns the text of the string line ! */ ! public String getText(); ! ! /** ! * Sets the string contents of the node. ! * @param text The new text for the node. ! */ ! public void setText(String text); } Index: NodeReader.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/NodeReader.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** NodeReader.java 24 Aug 2003 21:59:41 -0000 1.40 --- NodeReader.java 3 Sep 2003 23:36:18 -0000 1.41 *************** *** 52,98 **** public class NodeReader extends BufferedReader { ! public static final String DECIPHER_ERROR="NodeReader.readElement() : Error occurred while trying to decipher the tag using scanners"; ! protected int posInLine=-1; ! protected String line; ! protected Node node = null; ! protected TagScanner previousOpenScanner = null; ! protected String url; ! private Parser parser; ! private int lineCount; ! private String previousLine; ! private StringParser stringParser = new StringParser(); ! private RemarkNodeParser remarkNodeParser = new RemarkNodeParser(); ! private NodeList nextParsedNode = new NodeList(); ! private boolean dontReadNextLine=false; ! /** ! * The constructor takes in a reader object, it's length and the url to be read. ! */ ! public NodeReader(Reader in,int len,String url) ! { ! super(in, len); ! this.url = url; ! this.parser = null; ! this.lineCount = 1; ! } ! /** ! * This constructor basically overrides the existing constructor in the ! * BufferedReader class. * The URL defaults to an empty string. * @see #NodeReader(Reader,int,String) ! */ ! public NodeReader(Reader in, int len) ! { ! this(in,len,""); ! } ! /** ! * The constructor takes in a reader object, and the url to be read. * The buffer size defaults to 8192. * @see #NodeReader(Reader,int,String) ! */ ! public NodeReader(Reader in,String url) ! { ! this(in, 8192, url); ! } /** --- 52,98 ---- public class NodeReader extends BufferedReader { ! public static final String DECIPHER_ERROR="NodeReader.readElement() : Error occurred while trying to decipher the tag using scanners"; ! protected int posInLine=-1; ! protected String line; ! protected Node node = null; ! protected TagScanner previousOpenScanner = null; ! protected String url; ! private Parser parser; ! private int lineCount; ! private String previousLine; ! private StringParser stringParser = new StringParser(); ! private RemarkNodeParser remarkNodeParser = new RemarkNodeParser(); ! private NodeList nextParsedNode = new NodeList(); ! private boolean dontReadNextLine=false; ! /** ! * The constructor takes in a reader object, it's length and the url to be read. ! */ ! public NodeReader(Reader in,int len,String url) ! { ! super(in, len); ! this.url = url; ! this.parser = null; ! this.lineCount = 1; ! } ! /** ! * This constructor basically overrides the existing constructor in the ! * BufferedReader class. * The URL defaults to an empty string. * @see #NodeReader(Reader,int,String) ! */ ! public NodeReader(Reader in, int len) ! { ! this(in,len,""); ! } ! /** ! * The constructor takes in a reader object, and the url to be read. * The buffer size defaults to 8192. * @see #NodeReader(Reader,int,String) ! */ ! public NodeReader(Reader in,String url) ! { ! this(in, 8192, url); ! } /** *************** *** 105,177 **** } ! /** ! * This method is intended to be called only by scanners, when a situation of dirty html has arisen, ! * and action has been taken to correct the parsed tags. For e.g. if we have html of the form : ! * <pre> ! * <a href="somelink.html"><img src=...><td><tr><a href="someotherlink.html">...</a> ! * </pre> ! * Now to salvage the first link, we'd probably like to insert an end tag somewhere (typically before the ! * second begin link tag). So that the parsing continues uninterrupted, we will need to change the existing ! * line being parsed, to contain the end tag in it. ! */ ! public void changeLine(String line) { ! this.line = line; ! } ! public String getCurrentLine() { ! return line; ! } ! /** ! * Get the last line number that the reader has read ! * @return int last line number read by the reader ! */ ! public int getLastLineNumber() { ! return lineCount-1; ! } ! /** ! * This method is useful when designing your own scanners. You might need to find out what is the location where the ! * reader has stopped last. ! * @return int Last position read by the reader ! */ ! public int getLastReadPosition() { ! if (node!=null) return node.elementEnd(); else ! return 0; ! } ! /* ! * Read the next line ! * @return String containing the line ! */ ! public String getNextLine() ! { ! try ! { ! previousLine = line; ! line = readLine(); ! if (line!=null) ! lineCount++; ! posInLine = 0; ! return line; ! } ! catch (IOException e) ! { ! System.err.println("I/O Exception occurred while reading!"); ! } ! return null; ! } ! /** ! * Returns the parser object for which this reader exists ! * @return org.htmlparser.Parser ! */ ! public Parser getParser() { ! return parser; ! } ! /** ! * Gets the previousOpenScanner. ! * @return Returns a TagScanner ! */ ! public TagScanner getPreviousOpenScanner() { ! return previousOpenScanner; ! } /** --- 105,177 ---- } ! /** ! * This method is intended to be called only by scanners, when a situation of dirty html has arisen, ! * and action has been taken to correct the parsed tags. For e.g. if we have html of the form : ! * <pre> ! * <a href="somelink.html"><img src=...><td><tr><a href="someotherlink.html">...</a> ! * </pre> ! * Now to salvage the first link, we'd probably like to insert an end tag somewhere (typically before the ! * second begin link tag). So that the parsing continues uninterrupted, we will need to change the existing ! * line being parsed, to contain the end tag in it. ! */ ! public void changeLine(String line) { ! this.line = line; ! } ! public String getCurrentLine() { ! return line; ! } ! /** ! * Get the last line number that the reader has read ! * @return int last line number read by the reader ! */ ! public int getLastLineNumber() { ! return lineCount-1; ! } ! /** ! * This method is useful when designing your own scanners. You might need to find out what is the location where the ! * reader has stopped last. ! * @return int Last position read by the reader ! */ ! public int getLastReadPosition() { ! if (node!=null) return node.elementEnd(); else ! return 0; ! } ! /* ! * Read the next line ! * @return String containing the line ! */ ! public String getNextLine() ! { ! try ! { ! previousLine = line; ! line = readLine(); ! if (line!=null) ! lineCount++; ! posInLine = 0; ! return line; ! } ! catch (IOException e) ! { ! System.err.println("I/O Exception occurred while reading!"); ! } ! return null; ! } ! /** ! * Returns the parser object for which this reader exists ! * @return org.htmlparser.Parser ! */ ! public Parser getParser() { ! return parser; ! } ! /** ! * Gets the previousOpenScanner. ! * @return Returns a TagScanner ! */ ! public TagScanner getPreviousOpenScanner() { ! return previousOpenScanner; ! } /** *************** *** 202,241 **** /** ! * Read the next element ! * @return Node - The next node ! */ ! public Node readElement() throws ParserException { return (readElement (false)); } ! /** ! * Read the next element * @param balance_quotes If <code>true</code> string nodes are parsed * paying attention to single and double quotes, such that tag-like * strings are ignored if they are quoted. ! * @return Node - The next node ! */ ! public Node readElement(boolean balance_quotes) throws ParserException ! { ! try { ! if (nextParsedNode.size()>0) { ! node = nextParsedNode.elementAt(0); ! nextParsedNode.remove(0); ! return node; ! } ! if (readNextLine()) { ! do ! { ! line = getNextLine(); ! } ! while (line!=null && line.length()==0); ! ! } else if (dontReadNextLine) { ! dontReadNextLine = false; } else posInLine = getLastReadPosition() + 1; ! if (line==null) return null; --- 202,241 ---- /** ! * Read the next element ! * @return Node - The next node ! */ ! public Node readElement() throws ParserException { return (readElement (false)); } ! /** ! * Read the next element * @param balance_quotes If <code>true</code> string nodes are parsed * paying attention to single and double quotes, such that tag-like * strings are ignored if they are quoted. ! * @return Node - The next node ! */ ! public Node readElement(boolean balance_quotes) throws ParserException ! { ! try { ! if (nextParsedNode.size()>0) { ! node = nextParsedNode.elementAt(0); ! nextParsedNode.remove(0); ! return node; ! } ! if (readNextLine()) { ! do ! { ! line = getNextLine(); ! } ! while (line!=null && line.length()==0); ! ! } else if (dontReadNextLine) { ! dontReadNextLine = false; } else posInLine = getLastReadPosition() + 1; ! if (line==null) return null; *************** *** 255,263 **** } catch (Exception e) ! { StringBuffer msgBuffer = new StringBuffer(); msgBuffer.append(DECIPHER_ERROR+"\n" + ! " Tag being processed : "+tag.getTagName()+"\n" + ! " Current Tag Line : "+tag.getTagLine() ); appendLineDetails(msgBuffer); --- 255,263 ---- } catch (Exception e) ! { StringBuffer msgBuffer = new StringBuffer(); msgBuffer.append(DECIPHER_ERROR+"\n" + ! " Tag being processed : "+tag.getTagName()+"\n" + ! " Current Tag Line : "+tag.getTagLine() ); appendLineDetails(msgBuffer); *************** *** 277,400 **** if (node!=null) return node; } ! ! return null; ! } catch (ParserException pe) { throw pe; } ! catch (Exception e) { ! StringBuffer msgBuffer = new StringBuffer("NodeReader.readElement() : Error occurred while trying to read the next element,"); ! StringWriter sw = new StringWriter(); ! e.printStackTrace(new PrintWriter(sw)); ! appendLineDetails(msgBuffer); ! msgBuffer.append("\n Caused by:\n").append(sw.getBuffer().toString ()); ! ParserException ex = new ParserException(msgBuffer.toString(),e); ! parser.getFeedback().error(msgBuffer.toString(),ex); ! throw ex; ! } ! } ! public void appendLineDetails(StringBuffer msgBuffer) { ! msgBuffer.append("\nat Line "); ! msgBuffer.append(getLineCount()); ! msgBuffer.append(" : "); ! msgBuffer.append(getLine()); ! msgBuffer.append("\nPrevious Line ").append(getLineCount()-1); ! msgBuffer.append(" : ").append(getPreviousLine()); ! } ! /** ! * Do we need to read the next line ? ! * @return true - yes/ false - no ! */ ! protected boolean readNextLine() ! { ! if (dontReadNextLine) { ! return false; ! } ! if (posInLine==-1 || (line!=null && node.elementEnd()+1>=line.length())) ! return true; ! else return false; ! } ! /** ! * The setParser method is used by the parser to put its own object into the reader. This happens internally, ! * so this method is not generally for use by the developer or the user. ! */ ! public void setParser(Parser newParser) { ! parser = newParser; ! } ! /** ! * Sets the previousOpenScanner. ! * @param previousOpenScanner The previousOpenScanner to set ! */ ! public void setPreviousOpenScanner(TagScanner previousOpenScanner) { ! this.previousOpenScanner = previousOpenScanner; ! } ! ! /** ! * @param lineSeparator New Line separator to be used ! */ ! public static void setLineSeparator(String lineSeparator) ! { ! Parser.setLineSeparator(lineSeparator); ! } ! ! /** ! * Gets the line seperator that is being used ! * @return String ! */ ! public static String getLineSeparator() ! { ! return (Parser.getLineSeparator()); ! } ! /** ! * Returns the lineCount. ! * @return int ! */ ! public int getLineCount() { ! return lineCount; ! } ! /** ! * Returns the previousLine. ! * @return String ! */ ! public String getPreviousLine() { ! return previousLine; ! } ! /** ! * Returns the line. ! * @return String ! */ ! public String getLine() { ! return line; ! } ! /** ! * Sets the lineCount. ! * @param lineCount The lineCount to set ! */ ! public void setLineCount(int lineCount) { ! this.lineCount = lineCount; ! } ! /** ! * Sets the posInLine. ! * @param posInLine The posInLine to set ! */ ! public void setPosInLine(int posInLine) { ! this.posInLine = posInLine; ! } ! public void reset() throws IOException { ! super.reset(); ! lineCount = 1; ! posInLine = -1; ! } ! public StringParser getStringParser() { ! return stringParser; ! } /** --- 277,400 ---- if (node!=null) return node; } ! ! return null; ! } catch (ParserException pe) { throw pe; } ! catch (Exception e) { ! StringBuffer msgBuffer = new StringBuffer("NodeReader.readElement() : Error occurred while trying to read the next element,"); ! StringWriter sw = new StringWriter(); ! e.printStackTrace(new PrintWriter(sw)); ! appendLineDetails(msgBuffer); ! msgBuffer.append("\n Caused by:\n").append(sw.getBuffer().toString ()); ! ParserException ex = new ParserException(msgBuffer.toString(),e); ! parser.getFeedback().error(msgBuffer.toString(),ex); ! throw ex; ! } ! } ! public void appendLineDetails(StringBuffer msgBuffer) { ! msgBuffer.append("\nat Line "); ! msgBuffer.append(getLineCount()); ! msgBuffer.append(" : "); ! msgBuffer.append(getLine()); ! msgBuffer.append("\nPrevious Line ").append(getLineCount()-1); ! msgBuffer.append(" : ").append(getPreviousLine()); ! } ! /** ! * Do we need to read the next line ? ! * @return true - yes/ false - no ! */ ! protected boolean readNextLine() ! { ! if (dontReadNextLine) { ! return false; ! } ! if (posInLine==-1 || (line!=null && node.elementEnd()+1>=line.length())) ! return true; ! else return false; ! } ! /** ! * The setParser method is used by the parser to put its own object into the reader. This happens internally, ! * so this method is not generally for use by the developer or the user. ! */ ! public void setParser(Parser newParser) { ! parser = newParser; ! } ! /** ! * Sets the previousOpenScanner. ! * @param previousOpenScanner The previousOpenScanner to set ! */ ! public void setPreviousOpenScanner(TagScanner previousOpenScanner) { ! this.previousOpenScanner = previousOpenScanner; ! } ! ! /** ! * @param lineSeparator New Line separator to be used ! */ ! public static void setLineSeparator(String lineSeparator) ! { ! Parser.setLineSeparator(lineSeparator); ! } ! ! /** ! * Gets the line seperator that is being used ! * @return String ! */ ! public static String getLineSeparator() ! { ! return (Parser.getLineSeparator()); ! } ! /** ! * Returns the lineCount. ! * @return int ! */ ! public int getLineCount() { ! return lineCount; ! } ! /** ! * Returns the previousLine. ! * @return String ! */ ! public String getPreviousLine() { ! return previousLine; ! } ! /** ! * Returns the line. ! * @return String ! */ ! public String getLine() { ! return line; ! } ! /** ! * Sets the lineCount. ! * @param lineCount The lineCount to set ! */ ! public void setLineCount(int lineCount) { ! this.lineCount = lineCount; ! } ! /** ! * Sets the posInLine. ! * @param posInLine The posInLine to set ! */ ! public void setPosInLine(int posInLine) { ! this.posInLine = posInLine; ! } ! public void reset() throws IOException { ! super.reset(); ! lineCount = 1; ! posInLine = -1; ! } ! public StringParser getStringParser() { ! return stringParser; ! } /** *************** *** 404,417 **** * @param nextParsedNode The node that will be returned next by the reader. */ ! public void addNextParsedNode(Node nextParsedNode) { ! this.nextParsedNode.prepend(nextParsedNode); ! } ! ! public boolean isDontReadNextLine() { ! return dontReadNextLine; ! } ! public void setDontReadNextLine(boolean dontReadNextLine) { ! this.dontReadNextLine = dontReadNextLine; ! } } --- 404,417 ---- * @param nextParsedNode The node that will be returned next by the reader. */ ! public void addNextParsedNode(Node nextParsedNode) { ! this.nextParsedNode.prepend(nextParsedNode); ! } ! ! public boolean isDontReadNextLine() { ! return dontReadNextLine; ! } ! public void setDontReadNextLine(boolean dontReadNextLine) { ! this.dontReadNextLine = dontReadNextLine; ! } } Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/RemarkNode.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** RemarkNode.java 24 Aug 2003 21:59:41 -0000 1.27 --- RemarkNode.java 3 Sep 2003 23:36:18 -0000 1.28 *************** *** 38,88 **** public class RemarkNode extends AbstractNode { ! public final static String REMARK_NODE_FILTER="-r"; ! ! /** ! * Tag contents will have the contents of the comment tag. ! */ ! String tagContents; ! /** ! * The HTMLRemarkTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param nodeBegin beginning position of the tag ! * @param nodeEnd ending position of the tag ! * @param tagContents contents of the remark tag ! */ ! public RemarkNode(int nodeBegin, int nodeEnd, String tagContents) ! { ! super(nodeBegin,nodeEnd); ! this.tagContents = tagContents; ! } ! /** ! * Returns the text contents of the comment tag. ! */ ! public String getText() ! { ! return tagContents; ! } ! public String toPlainTextString() { ! return tagContents; ! } ! public String toHtml() { ! return "<!--"+tagContents+"-->"; ! } ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { ! return "Comment Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n"; ! } ! public void collectInto(NodeList collectionList, String filter) { ! if (filter==REMARK_NODE_FILTER) collectionList.add(this); ! } ! public void accept(Object visitor) { ! ((NodeVisitor)visitor).visitRemarkNode(this); ! } } --- 38,88 ---- public class RemarkNode extends AbstractNode { ! public final static String REMARK_NODE_FILTER="-r"; ! ! /** ! * Tag contents will have the contents of the comment tag. ! */ ! String tagContents; ! /** ! * The HTMLRemarkTag is constructed by providing the beginning posn, ending posn ! * and the tag contents. ! * @param nodeBegin beginning position of the tag ! * @param nodeEnd ending position of the tag ! * @param tagContents contents of the remark tag ! */ ! public RemarkNode(int nodeBegin, int nodeEnd, String tagContents) ! { ! super(nodeBegin,nodeEnd); ! this.tagContents = tagContents; ! } ! /** ! * Returns the text contents of the comment tag. ! */ ! public String getText() ! { ! return tagContents; ! } ! public String toPlainTextString() { ! return tagContents; ! } ! public String toHtml() { ! return "<!--"+tagContents+"-->"; ! } ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { ! return "Comment Tag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd()+"\n"; ! } ! public void collectInto(NodeList collectionList, String filter) { ! if (filter==REMARK_NODE_FILTER) collectionList.add(this); ! } ! public void accept(Object visitor) { ! ((NodeVisitor)visitor).visitRemarkNode(this); ! } } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNode.java,v retrieving revision 1.35 retrieving revision 1.36 diff -C2 -d -r1.35 -r1.36 *** StringNode.java 24 Aug 2003 21:59:41 -0000 1.35 --- StringNode.java 3 Sep 2003 23:36:18 -0000 1.36 *************** *** 38,93 **** public class StringNode extends AbstractNode { ! public static final String STRING_FILTER="-string"; ! ! /** ! * The text of the string. ! */ ! protected StringBuffer textBuffer; ! /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param text The contents of the string line ! * @param textBegin The beginning position of the string ! * @param textEnd The ending positiong of the string ! */ ! public StringNode (StringBuffer text, int textBegin,int textEnd) ! { ! super(textBegin,textEnd); ! this.textBuffer = text; ! } ! /** ! * Returns the text of the string line ! */ ! public String getText() { ! return textBuffer.toString(); ! } /** * Sets the string contents of the node. * @param text The new text for the node. */ ! public void setText(String text) ! { ! textBuffer = new StringBuffer (text); ! } ! ! public String toPlainTextString() { ! return textBuffer.toString(); ! } ! ! public String toHtml() { ! return textBuffer.toString(); ! } ! ! public String toString() { ! return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! ! public void collectInto(NodeList collectionList, String filter) { ! if (filter==STRING_FILTER) collectionList.add(this); ! } ! public void accept(Object visitor) { ! ((NodeVisitor)visitor).visitStringNode(this); ! } } --- 38,93 ---- public class StringNode extends AbstractNode { ! public static final String STRING_FILTER="-string"; ! ! /** ! * The text of the string. ! */ ! protected StringBuffer textBuffer; ! /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param text The contents of the string line ! * @param textBegin The beginning position of the string ! * @param textEnd The ending positiong of the string ! */ ! public StringNode (StringBuffer text, int textBegin,int textEnd) ! { ! super(textBegin,textEnd); ! this.textBuffer = text; ! } ! /** ! * Returns the text of the string line ! */ ! public String getText() { ! return textBuffer.toString(); ! } /** * Sets the string contents of the node. * @param text The new text for the node. */ ! public void setText(String text) ! { ! textBuffer = new StringBuffer (text); ! } ! ! public String toPlainTextString() { ! return textBuffer.toString(); ! } ! ! public String toHtml() { ! return textBuffer.toString(); ! } ! ! public String toString() { ! return "Text = "+getText()+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); ! } ! ! public void collectInto(NodeList collectionList, String filter) { ! if (filter==STRING_FILTER) collectionList.add(this); ! } ! public void accept(Object visitor) { ! ((NodeVisitor)visitor).visitStringNode(this); ! } } Index: StringNodeFactory.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/StringNodeFactory.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** StringNodeFactory.java 12 Jul 2003 00:33:59 -0000 1.1 --- StringNodeFactory.java 3 Sep 2003 23:36:18 -0000 1.2 *************** *** 8,70 **** public class StringNodeFactory implements Serializable { ! ! /** ! * Flag to tell the parser to decode strings returned by StringNode's toPlainTextString. ! * Decoding occurs via the method, org.htmlparser.util.Translate.decode() ! */ ! private boolean shouldDecodeNodes = false; ! /** ! * Flag to tell the parser to remove escape characters, like \n and \t, returned by StringNode's toPlainTextString. ! * Escape character removal occurs via the method, org.htmlparser.util.ParserUtils.removeEscapeCharacters() ! */ ! private boolean shouldRemoveEscapeCharacters = false; ! ! /** ! * Flag to tell the parser to convert non breaking space ! * (i.e. \u00a0) to a space (" "). If true, this will happen inside StringNode's toPlainTextString. ! */ ! private boolean shouldConvertNonBreakingSpace = false; ! public Node createStringNode( ! StringBuffer textBuffer, ! int textBegin, ! int textEnd) { ! Node newNode = new StringNode(textBuffer, textBegin, textEnd); ! if (shouldDecodeNodes()) ! newNode = new DecodingNode(newNode); ! if (shouldRemoveEscapeCharacters()) ! newNode = new EscapeCharacterRemovingNode(newNode); ! if (shouldConvertNonBreakingSpace()) ! newNode = new NonBreakingSpaceConvertingNode(newNode); ! return newNode; ! } ! ! /** ! * Tells the parser to decode nodes using org.htmlparser.util.Translate.decode() ! */ ! public void setNodeDecoding(boolean shouldDecodeNodes) { ! this.shouldDecodeNodes = shouldDecodeNodes; ! } ! public boolean shouldDecodeNodes() { ! return shouldDecodeNodes; ! } ! public void setEscapeCharacterRemoval(boolean shouldRemoveEscapeCharacters) { ! this.shouldRemoveEscapeCharacters = shouldRemoveEscapeCharacters; ! } ! public boolean shouldRemoveEscapeCharacters() { ! return shouldRemoveEscapeCharacters; ! } ! public void setNonBreakSpaceConversion(boolean shouldConvertNonBreakSpace) { ! this.shouldConvertNonBreakingSpace = shouldConvertNonBreakSpace; ! } ! ! public boolean shouldConvertNonBreakingSpace() { ! return shouldConvertNonBreakingSpace; ! } } --- 8,70 ---- public class StringNodeFactory implements Serializable { ! ! /** ! * Flag to tell the parser to decode strings returned by StringNode's toPlainTextString. ! * Decoding occurs via the method, org.htmlparser.util.Translate.decode() ! */ ! private boolean shouldDecodeNodes = false; ! /** ! * Flag to tell the parser to remove escape characters, like \n and \t, returned by StringNode's toPlainTextString. ! * Escape character removal occurs via the method, org.htmlparser.util.ParserUtils.removeEscapeCharacters() ! */ ! private boolean shouldRemoveEscapeCharacters = false; ! ! /** ! * Flag to tell the parser to convert non breaking space ! * (i.e. \u00a0) to a space (" "). If true, this will happen inside StringNode's toPlainTextString. ! */ ! private boolean shouldConvertNonBreakingSpace = false; ! public Node createStringNode( ! StringBuffer textBuffer, ! int textBegin, ! int textEnd) { ! Node newNode = new StringNode(textBuffer, textBegin, textEnd); ! if (shouldDecodeNodes()) ! newNode = new DecodingNode(newNode); ! if (shouldRemoveEscapeCharacters()) ! newNode = new EscapeCharacterRemovingNode(newNode); ! if (shouldConvertNonBreakingSpace()) ! newNode = new NonBreakingSpaceConvertingNode(newNode); ! return newNode; ! } ! ! /** ! * Tells the parser to decode nodes using org.htmlparser.util.Translate.decode() ! */ ! public void setNodeDecoding(boolean shouldDecodeNodes) { ! this.shouldDecodeNodes = shouldDecodeNodes; ! } ! public boolean shouldDecodeNodes() { ! return shouldDecodeNodes; ! } ! public void setEscapeCharacterRemoval(boolean shouldRemoveEscapeCharacters) { ! this.shouldRemoveEscapeCharacters = shouldRemoveEscapeCharacters; ! } ! public boolean shouldRemoveEscapeCharacters() { ! return shouldRemoveEscapeCharacters; ! } ! public void setNonBreakSpaceConversion(boolean shouldConvertNonBreakSpace) { ! this.shouldConvertNonBreakingSpace = shouldConvertNonBreakSpace; ! } ! ! public boolean shouldConvertNonBreakingSpace() { ! return shouldConvertNonBreakingSpace; ! } } |
From: <der...@us...> - 2003-09-03 23:36:52
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes In directory sc8-pr-cvs1:/tmp/cvs-serv31228/lexer/nodes Modified Files: AbstractNode.java RemarkNode.java StringNode.java TagNode.java Log Message: Change tabs to spaces in all source files. Index: AbstractNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/AbstractNode.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** AbstractNode.java 24 Aug 2003 21:59:41 -0000 1.2 --- AbstractNode.java 3 Sep 2003 23:36:18 -0000 1.3 *************** *** 49,53 **** * Create a lexeme. * Remember the page and start & end cursor positions. ! * @param page The page this tag was read from. * @param start The starting offset of this node within the page. * @param end The ending offset of this node within the page. --- 49,53 ---- * Create a lexeme. * Remember the page and start & end cursor positions. ! * @param page The page this tag was read from. * @param start The starting offset of this node within the page. * @param end The ending offset of this node within the page. Index: RemarkNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/RemarkNode.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** RemarkNode.java 24 Aug 2003 21:59:41 -0000 1.3 --- RemarkNode.java 3 Sep 2003 23:36:18 -0000 1.4 *************** *** 39,76 **** public class RemarkNode extends AbstractNode { ! public final static String REMARK_NODE_FILTER="-r"; ! ! /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. ! */ ! public RemarkNode (Page page, int start, int end) ! { ! super (page, start, end); ! } /** ! * Returns the text contents of the comment tag. * todo: this only works for the usual case. ! */ ! public String getText() ! { ! return (mPage.getText (elementBegin () + 4, elementEnd () - 3)); ! } public String toPlainTextString() { ! return (getText()); ! } ! public String toHtml() { ! return (mPage.getText (elementBegin (), elementEnd ())); ! } ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { Cursor start; Cursor end; --- 39,76 ---- public class RemarkNode extends AbstractNode { ! public final static String REMARK_NODE_FILTER="-r"; ! ! /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. ! */ ! public RemarkNode (Page page, int start, int end) ! { ! super (page, start, end); ! } /** ! * Returns the text contents of the comment tag. * todo: this only works for the usual case. ! */ ! public String getText() ! { ! return (mPage.getText (elementBegin () + 4, elementEnd () - 3)); ! } public String toPlainTextString() { ! return (getText()); ! } ! public String toHtml() { ! return (mPage.getText (elementBegin (), elementEnd ())); ! } ! /** ! * Print the contents of the remark tag. ! */ ! public String toString() ! { Cursor start; Cursor end; *************** *** 78,89 **** start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return ("Rem (" + start.toString () + "," + end.toString () + "): " + getText ()); ! } ! public void collectInto(NodeList collectionList, String filter) { ! if (filter==REMARK_NODE_FILTER) collectionList.add(this); ! } ! public void accept(Object visitor) { ! } } --- 78,89 ---- start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return ("Rem (" + start.toString () + "," + end.toString () + "): " + getText ()); ! } ! public void collectInto(NodeList collectionList, String filter) { ! if (filter==REMARK_NODE_FILTER) collectionList.add(this); ! } ! public void accept(Object visitor) { ! } } Index: StringNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/StringNode.java,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** StringNode.java 24 Aug 2003 21:59:41 -0000 1.3 --- StringNode.java 3 Sep 2003 23:36:19 -0000 1.4 *************** *** 40,55 **** public class StringNode extends AbstractNode { ! public static final String STRING_FILTER = "-string"; ! ! /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. ! */ ! public StringNode (Page page, int start, int end) ! { ! super (page, start, end); ! } /** --- 40,55 ---- public class StringNode extends AbstractNode { ! public static final String STRING_FILTER = "-string"; ! ! /** ! * Constructor takes in the text string, beginning and ending posns. ! * @param page The page this string is on. ! * @param start The beginning position of the string. ! * @param end The ending positiong of the string. ! */ ! public StringNode (Page page, int start, int end) ! { ! super (page, start, end); ! } /** *************** *** 89,93 **** public String toString () ! { Cursor start; Cursor end; --- 89,93 ---- public String toString () ! { Cursor start; Cursor end; *************** *** 95,100 **** start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return ("Txt (" + start.toString () + "," + end.toString () + "): " + getText ()); ! } --- 95,100 ---- start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return ("Txt (" + start.toString () + "," + end.toString () + "): " + getText ()); ! } Index: TagNode.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/nodes/TagNode.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** TagNode.java 24 Aug 2003 21:59:41 -0000 1.5 --- TagNode.java 3 Sep 2003 23:36:19 -0000 1.6 *************** *** 48,69 **** public class TagNode extends AbstractNode { ! public static final String TYPE = "TAG"; ! /** ! * Constant used as value for the value of the tag name ! * in parseParameters (Kaarle Kaila 3.8.2001) ! */ ! public final static String TAGNAME = "$<TAGNAME>$"; ! public final static String EMPTYTAG = "$<EMPTYTAG>$"; public final static String NULLVALUE = "$<NULL>$"; public final static String NOTHING = "$<NOTHING>$"; ! private final static String EMPTY_STRING=""; ! ! private boolean emptyXmlTag = false; /** ! * The tag attributes. * Objects of type Attribute. ! */ ! protected Vector mAttributes; /** --- 48,69 ---- public class TagNode extends AbstractNode { ! public static final String TYPE = "TAG"; ! /** ! * Constant used as value for the value of the tag name ! * in parseParameters (Kaarle Kaila 3.8.2001) ! */ ! public final static String TAGNAME = "$<TAGNAME>$"; ! public final static String EMPTYTAG = "$<EMPTYTAG>$"; public final static String NULLVALUE = "$<NULL>$"; public final static String NOTHING = "$<NOTHING>$"; ! private final static String EMPTY_STRING=""; ! ! private boolean emptyXmlTag = false; /** ! * The tag attributes. * Objects of type Attribute. ! */ ! protected Vector mAttributes; /** *************** *** 106,171 **** } ! /** ! * Create a tag with the location and attributes provided ! * @param page The page this tag was read from. * @param start The starting offset of this node within the page. * @param end The ending offset of this node within the page. * @param attributes The list of attributes that were parsed in this tag. * @see Attribute ! */ ! public TagNode (Page page, int start, int end, Vector attributes) ! { ! super (page, start, end); mAttributes = attributes; ! } ! /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter ! */ ! public String getAttribute (String name) { ! return ((String)getAttributes().get(name.toUpperCase())); ! } ! /** ! * Set attribute with given key, value pair. ! * @param key ! * @param value ! */ ! public void setAttribute(String key, String value) { ! getAttributes ().put(key,value); ! } ! /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter ! * @deprecated use getAttribute instead ! */ ! public String getParameter(String name) { ! return (String)getAttributes().get (name.toUpperCase()); ! } ! ! /** ! * Gets the attributes in the tag. * NOTE: Values of the extended hashtable are two element arrays of String, * with the first element being the original name (not uppercased), * and the second element being the value. ! * @return Returns a special hashtable of attributes in two element String arrays. ! */ ! public Vector getAttributesEx() { ! return mAttributes; ! } ! /** ! * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes ! */ ! public Hashtable getAttributes() { Vector attributes; --- 106,171 ---- } ! /** ! * Create a tag with the location and attributes provided ! * @param page The page this tag was read from. * @param start The starting offset of this node within the page. * @param end The ending offset of this node within the page. * @param attributes The list of attributes that were parsed in this tag. * @see Attribute ! */ ! public TagNode (Page page, int start, int end, Vector attributes) ! { ! super (page, start, end); mAttributes = attributes; ! } ! /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter ! */ ! public String getAttribute (String name) { ! return ((String)getAttributes().get(name.toUpperCase())); ! } ! /** ! * Set attribute with given key, value pair. ! * @param key ! * @param value ! */ ! public void setAttribute(String key, String value) { ! getAttributes ().put(key,value); ! } ! /** ! * In case the tag is parsed at the scan method this will return value of a ! * parameter not implemented yet ! * @param name of parameter ! * @deprecated use getAttribute instead ! */ ! public String getParameter(String name) { ! return (String)getAttributes().get (name.toUpperCase()); ! } ! ! /** ! * Gets the attributes in the tag. * NOTE: Values of the extended hashtable are two element arrays of String, * with the first element being the original name (not uppercased), * and the second element being the value. ! * @return Returns a special hashtable of attributes in two element String arrays. ! */ ! public Vector getAttributesEx() { ! return mAttributes; ! } ! /** ! * Gets the attributes in the tag. ! * @return Returns a Hashtable of attributes ! */ ! public Hashtable getAttributes() { Vector attributes; *************** *** 217,239 **** return (ret); ! } public String getTagName(){ ! return getParameter(TAGNAME); ! } /** ! * Return the text contained in this tag ! */ ! public String getText() { ! return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); ! } ! /** ! * Sets the attributes. ! * @param attributes The attribute collection to set. ! */ ! public void setAttributes (Hashtable attributes) { Vector att; --- 217,239 ---- return (ret); ! } public String getTagName(){ ! return getParameter(TAGNAME); ! } /** ! * Return the text contained in this tag ! */ ! public String getText() { ! return (mPage.getText (elementBegin () + 1, elementEnd () - 1)); ! } ! /** ! * Sets the attributes. ! * @param attributes The attribute collection to set. ! */ ! public void setAttributes (Hashtable attributes) { Vector att; *************** *** 261,276 **** quote = (char)0; attribute = new Attribute (key, value, quote); ! att.addElement (attribute); } ! this.mAttributes = att; ! } ! /** ! * Sets the attributes. * NOTE: Values of the extended hashtable are two element arrays of String, * with the first element being the original name (not uppercased), * and the second element being the value. ! * @param attribs The attribute collection to set. ! */ public void setAttributesEx (Vector attribs) { --- 261,276 ---- quote = (char)0; attribute = new Attribute (key, value, quote); ! att.addElement (attribute); } ! this.mAttributes = att; ! } ! /** ! * Sets the attributes. * NOTE: Values of the extended hashtable are two element arrays of String, * with the first element being the original name (not uppercased), * and the second element being the value. ! * @param attribs The attribute collection to set. ! */ public void setAttributesEx (Vector attribs) { *************** *** 278,312 **** } ! /** ! * Sets the nodeBegin. ! * @param tagBegin The nodeBegin to set ! */ ! public void setTagBegin(int tagBegin) { ! this.nodeBegin = tagBegin; ! } ! /** ! * Gets the nodeBegin. ! * @return The nodeBegin value. ! */ ! public int getTagBegin() { ! return (nodeBegin); ! } ! ! /** ! * Sets the nodeEnd. ! * @param tagEnd The nodeEnd to set ! */ ! public void setTagEnd(int tagEnd) { ! this.nodeEnd = tagEnd; ! } ! ! /** ! * Gets the nodeEnd. ! * @return The nodeEnd value. ! */ ! public int getTagEnd() { ! return (nodeEnd); ! } public void setText (String text) --- 278,312 ---- } ! /** ! * Sets the nodeBegin. ! * @param tagBegin The nodeBegin to set ! */ ! public void setTagBegin(int tagBegin) { ! this.nodeBegin = tagBegin; ! } ! /** ! * Gets the nodeBegin. ! * @return The nodeBegin value. ! */ ! public int getTagBegin() { ! return (nodeBegin); ! } ! ! /** ! * Sets the nodeEnd. ! * @param tagEnd The nodeEnd to set ! */ ! public void setTagEnd(int tagEnd) { ! this.nodeEnd = tagEnd; ! } ! ! /** ! * Gets the nodeEnd. ! * @return The nodeEnd value. ! */ ! public int getTagEnd() { ! return (nodeEnd); ! } public void setText (String text) *************** *** 324,337 **** public String toPlainTextString() { ! return EMPTY_STRING; ! } ! /** ! * A call to a tag's toHTML() method will render it in HTML. ! * @see org.htmlparser.Node#toHtml() ! */ ! public String toHtml() { ! StringBuffer ret; Vector attributes; Attribute attribute; --- 324,337 ---- public String toPlainTextString() { ! return EMPTY_STRING; ! } ! /** ! * A call to a tag's toHTML() method will render it in HTML. ! * @see org.htmlparser.Node#toHtml() ! */ ! public String toHtml() { ! StringBuffer ret; Vector attributes; Attribute attribute; *************** *** 339,343 **** ret = new StringBuffer (); attributes = getAttributesEx (); ! ret.append ("<"); if (0 < attributes.size ()) { --- 339,343 ---- ret = new StringBuffer (); attributes = getAttributesEx (); ! ret.append ("<"); if (0 < attributes.size ()) { *************** *** 352,367 **** } } ! if (isEmptyXmlTag ()) ret.append ("/"); ! ret.append (">"); ! return (ret.toString ()); } ! /** ! * Print the contents of the tag ! */ ! public String toString() ! { String tag; Cursor start; --- 352,367 ---- } } ! if (isEmptyXmlTag ()) ret.append ("/"); ! ret.append (">"); ! return (ret.toString ()); } ! /** ! * Print the contents of the tag ! */ ! public String toString() ! { String tag; Cursor start; *************** *** 375,380 **** start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return (tag + " (" + start.toString () + "," + end.toString () + "): " + getText ()); ! } /** --- 375,380 ---- start = new Cursor (getPage (), elementBegin ()); end = new Cursor (getPage (), elementEnd ()); ! return (tag + " (" + start.toString () + "," + end.toString () + "): " + getText ()); ! } /** *************** *** 395,446 **** * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) { ! } ! /** ! * Returns table of attributes in the tag ! * @return Hashtable ! * @deprecated This method is deprecated. Use getAttributes() instead. ! */ ! public Hashtable getParsed() { ! return getAttributes (); ! } ! /** ! * Sometimes, a scanner may need to request a re-evaluation of the ! * attributes in a tag. This may happen when there is some correction ! * activity. An example of its usage can be found in ImageTag. ! * <br> ! * <B>Note:<B> This is an intensive task, hence call only when ! * really necessary ! * @return Hashtable ! */ ! public Hashtable redoParseAttributes() { mAttributes = null; getAttributesEx (); ! return (getAttributes ()); ! } ! public void accept(Object visitor) { ! } ! public String getType() { ! return TYPE; ! } ! /** ! * Is this an empty xml tag of the form<br> ! * <tag/> ! * @return boolean ! */ ! public boolean isEmptyXmlTag() { ! return emptyXmlTag; ! } ! public void setEmptyXmlTag(boolean emptyXmlTag) { ! this.emptyXmlTag = emptyXmlTag; ! } } --- 395,446 ---- * @see org.htmlparser.Node#collectInto(NodeList, String) */ ! public void collectInto(NodeList collectionList, String filter) { ! } ! /** ! * Returns table of attributes in the tag ! * @return Hashtable ! * @deprecated This method is deprecated. Use getAttributes() instead. ! */ ! public Hashtable getParsed() { ! return getAttributes (); ! } ! /** ! * Sometimes, a scanner may need to request a re-evaluation of the ! * attributes in a tag. This may happen when there is some correction ! * activity. An example of its usage can be found in ImageTag. ! * <br> ! * <B>Note:<B> This is an intensive task, hence call only when ! * really necessary ! * @return Hashtable ! */ ! public Hashtable redoParseAttributes() { mAttributes = null; getAttributesEx (); ! return (getAttributes ()); ! } ! public void accept(Object visitor) { ! } ! public String getType() { ! return TYPE; ! } ! /** ! * Is this an empty xml tag of the form<br> ! * <tag/> ! * @return boolean ! */ ! public boolean isEmptyXmlTag() { ! return emptyXmlTag; ! } ! public void setEmptyXmlTag(boolean emptyXmlTag) { ! this.emptyXmlTag = emptyXmlTag; ! } } |