htmlparser-cvs Mailing List for HTML Parser (Page 40)
Brought to you by:
derrickoswald
You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(141) |
Jun
(108) |
Jul
(66) |
Aug
(127) |
Sep
(155) |
Oct
(149) |
Nov
(72) |
Dec
(72) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(100) |
Feb
(36) |
Mar
(21) |
Apr
(3) |
May
(87) |
Jun
(28) |
Jul
(84) |
Aug
(5) |
Sep
(14) |
Oct
|
Nov
|
Dec
|
2005 |
Jan
(1) |
Feb
(39) |
Mar
(26) |
Apr
(38) |
May
(14) |
Jun
(10) |
Jul
|
Aug
|
Sep
(13) |
Oct
(8) |
Nov
(10) |
Dec
|
2006 |
Jan
|
Feb
(1) |
Mar
(17) |
Apr
(20) |
May
(28) |
Jun
(24) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <der...@us...> - 2003-09-22 00:44:12
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv21038 Modified Files: build.xml Log Message: Integrate Thumbelina into the release. Added executables and alter Release target to add thumbelina.jar. Index: build.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/build.xml,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** build.xml 21 Sep 2003 18:20:55 -0000 1.47 --- build.xml 22 Sep 2003 00:44:03 -0000 1.48 *************** *** 100,103 **** --- 100,108 ---- </target> + <target name="JDK1.4"> + <condition property="JDK1.4"> + <equals arg1="1.4" arg2="${ant.java.version}"/> + </condition> + </target> <target name="init" description="initialize version properties"> *************** *** 307,311 **** <!-- Create the Thumbelina jar --> ! <target name="thumbelina" depends="jarlexer" description="create thumbelina.jar"> <javac compiler="javac1.4" srcdir="${src}" debug="on" classpath="src:${dist}/lib/htmllexer.jar"> <include name="org/htmlparser/lexerapplications/thumbelina/**/*.java"/> --- 312,323 ---- <!-- Create the Thumbelina jar --> ! <target name="thumbelina" depends="JDK1.4,jarlexer" description="create thumbelina.jar" if="JDK1.4"> ! <!-- Create the distribution directory --> ! <mkdir dir="${dist}/lib"/> ! ! <echo message="**********************************"/> ! <echo message="* Creating thumbelina.jar.... *"/> ! <echo message="**********************************"/> ! <javac compiler="javac1.4" srcdir="${src}" debug="on" classpath="src:${dist}/lib/htmllexer.jar"> <include name="org/htmlparser/lexerapplications/thumbelina/**/*.java"/> *************** *** 417,425 **** <fileset dir="${resources}" includes="*.bat"/> <fileset dir="${resources}" includes="lexer"/> </copy> </target> <!-- The release directory structuring finishes here --> ! <target name="Release" depends="versionSource,jar,javadoc,CopyBatch" description="prepare the release files"> </target> --- 429,438 ---- <fileset dir="${resources}" includes="*.bat"/> <fileset dir="${resources}" includes="lexer"/> + <fileset dir="${resources}" includes="thumbelina"/> </copy> </target> <!-- The release directory structuring finishes here --> ! <target name="Release" depends="versionSource,jar,thumbelina,javadoc,CopyBatch" description="prepare the release files"> </target> |
From: <der...@us...> - 2003-09-22 00:44:12
|
Update of /cvsroot/htmlparser/htmlparser/resources In directory sc8-pr-cvs1:/tmp/cvs-serv21038/resources Added Files: runThumbelina.bat thumbelina Log Message: Integrate Thumbelina into the release. Added executables and alter Release target to add thumbelina.jar. --- NEW FILE: runThumbelina.bat --- java -Xmx256M -jar ..\lib\thumbelina.jar %1 %2 --- NEW FILE: thumbelina --- #! /bin/sh if [ -z "$HTMLPARSER_HOME" ] ; then ## resolve links - $0 may be a link to the home PRG="$0" progname=`basename "$0"` saveddir=`pwd` # need this for relative symlinks dirname_prg=`dirname "$PRG"` cd "$dirname_prg" while [ -h "$PRG" ] ; do ls=`ls -ld "$PRG"` link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '/.*' > /dev/null; then PRG="$link" else PRG=`dirname "$PRG"`"/$link" fi done HTMLPARSER_HOME=`dirname "$PRG"`/.. cd "$saveddir" # make it fully qualified HTMLPARSER_HOME=`cd "$HTMLPARSER_HOME" && pwd` fi if [ -z "$JAVACMD" ] ; then if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables JAVACMD="$JAVA_HOME/jre/sh/java" else JAVACMD="$JAVA_HOME/bin/java" fi else JAVACMD=`which java 2> /dev/null ` if [ -z "$JAVACMD" ] ; then JAVACMD=java fi fi fi if [ ! -x "$JAVACMD" ] ; then echo "Error: JAVA_HOME is not defined correctly." echo " We cannot execute $JAVACMD" exit 1 fi HTMLPARSER_LIB="${HTMLPARSER_HOME}/lib" "$JAVACMD" -Xmx256M -jar "${HTMLPARSER_LIB}/thumbelina.jar" "$@" |
From: <der...@us...> - 2003-09-21 18:22:45
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer In directory sc8-pr-cvs1:/tmp/cvs-serv17026 Modified Files: Page.java Log Message: Remove 'oops' from thrown ParserExceptions. Index: Page.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Page.java 10 Sep 2003 03:38:18 -0000 1.13 --- Page.java 21 Sep 2003 18:22:39 -0000 1.14 *************** *** 107,111 **** catch (IOException ioe) { ! throw new ParserException ("oops", ioe); } try --- 107,111 ---- catch (IOException ioe) { ! throw new ParserException (ioe.getMessage (), ioe); } try *************** *** 115,119 **** catch (IOException ioe) { ! throw new ParserException ("oops2", ioe); } mIndex = new PageIndex (this); --- 115,119 ---- catch (IOException ioe) { ! throw new ParserException (ioe.getMessage (), ioe); } mIndex = new PageIndex (this); |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina In directory sc8-pr-cvs1:/tmp/cvs-serv16611/src/org/htmlparser/lexerapplications/thumbelina Added Files: Thumbelina.java package.html TileSet.java PicturePanel.java ThumbelinaFrame.java Picture.java Sequencer.java Log Message: Thumbelina Created a lexer GUI application to extract images behind thumbnails. Added a task in the ant build script - thumbelina - to create the jar file. You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. Usage: java -Xmx256M thumbelina.jar [URL] --- NEW FILE: Thumbelina.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/Thumbelina.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/21 18:20:56 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU [...1395 lines suppressed...] } return (!done); } } } /* * Revision Control Modification History * * $Log: Thumbelina.java,v $ * Revision 1.1 2003/09/21 18:20:56 derrickoswald * Thumbelina * Created a lexer GUI application to extract images behind thumbnails. * Added a task in the ant build script - thumbelina - to create the jar file. * You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. * Usage: java -Xmx256M thumbelina.jar [URL] * * */ --- NEW FILE: package.html --- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <HTML> <HEAD> <TITLE>Thumbelina</TITLE> </HEAD> <BODY> Extract the images behind thumbnail images. This package is a demonstration of filtering the tags that are produced by the Lexer package. In this case the idea is to find links to known types of image file (.gif, .png and .jpg) that have as the link text a reference to a smaller or lower resolution image, often called a thumbnail image; hence the name. <p> Besides a lot of support code to provide a user interface, the heart of the process is found in <code>Thumbelina.extractImageLinks()</code>, which has a wee state machine that notes when an <IMG> tag is discovered within the body of an <A></A> tag pair. This triggers a fetch of the <code>HREF</code> (image file). <p> The fetch is performed in the background by the <code>ToolKit</code> image loading code which runs 4 threads (on my machine). When an image is received it is added to the list of pending images. This list is drained by the <code>Sequencer</code> as it presents images at fixed intervals. <p> The <code>TileSet</code> and <code>Picture</code> classes provide a framework for displaying the various sizes of image that arrive in a random way, while still being able to repaint the panel when required. <p> The images are only retained in memory long enough to get covered over by subsequent images, but in general, the manipulation of images is a memory intensive task which requires a higher than normal limit on the maximum heap memory, i.e. use the <code>-Xms256M</code> command line switch to avoid <code>java.lang.OutOfMemoryError</code> messages. <p> The rest is just the UI code, that can be altered by intrepid programmers as they see fit. <p> <b>TODO</b> <li>Fix race condition that background thread adds new URL's after a reset.</li> <li>Send output to log window instead of URL's in titlebar.</li> <li>Add pending list items as greyed out items to the history list.</li> <li>Make status bar a pipeline with valves and limit switches (better on/off buttons).</li> <li>Fix race condition that sometimes doesn't resize PicturePanel with frame.</li> <li>Tree view.</li> <li>Drag and drop support.</li> <li>JavaHelp.</li> <li>Allow filter configuration.</li> <li>Handle OutOfMemoryError more gracefully (trap System.err?).</li> <li>Add more background threads.</li> <li>Find out how to honour reset on the image fetcher threads.</li> </BODY> </HTML> --- NEW FILE: TileSet.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/TileSet.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/21 18:20:56 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.lexerapplications.thumbelina; import java.awt.Rectangle; import java.util.Enumeration; import java.util.Vector; /** * Class to track picture regions. */ public class TileSet /* extends java.awt.Canvas implements java.awt.event.ActionListener, java.awt.event.MouseListener, java.awt.event.WindowListener */ { /** * The list of Pictures. */ protected Vector mRegions; /** * Construct a tile set. */ public TileSet () { mRegions = new Vector (); } /** * Get the number of tiles in this collection. * @return The number of pictures showing. * Note that the same image and URL may be showing * (different pieces) in several locations. */ public int getSize () { return (mRegions.size ()); } /** * Get the list of pictures. * @return An enumeration over the picture objects in this set. */ public Enumeration getPictures () { return (mRegions.elements ()); } /** * Add a single picture to the list. * @param r The picture to add. */ public void add (final Picture r) { Vector regions; // this will be the new set Enumeration e; Picture rover; Rectangle intersection; Vector splits; Enumeration frags; regions = new Vector (); for (e = getPictures (); e.hasMoreElements (); ) { rover = (Picture)e.nextElement (); if (rover.intersects (r)) { intersection = rover.intersection (r); if (!intersection.equals (rover)) { // incoming lies completely within the existing picture // or touches the existing picture somehow splits = split (r, rover, false); for (frags = splits.elements (); frags.hasMoreElements (); ) regions.addElement ((Picture)frags.nextElement ()); } else // incoming covers existing... drop the existing picture // but be sure to release the image memory rover.setImage (null); } else // no conflict, keep the existing regions.addElement (rover); } regions.addElement (r); mRegions = regions; } /** * Split the large picture. * Strategy: split horizontally (full width strips top and bottom). * NOTE: top and bottom make sense only in terms of AWT coordinates. * @param small The incoming picture. * @param large The encompassing picture. The attributes of this one * are propagated to the fragments. * @param keep If <code>true</code>, the center area is kept, * otherwise discarded. * @return The fragments from the large picture. */ private Vector split ( final Picture small, final Picture large, final boolean keep) { Picture m; Vector ret; ret = new Vector (); if (large.intersects (small)) { Rectangle intersection = large.intersection (small); // if tops don't match split off the top if ((intersection.y + intersection.height) != (large.y + large.height)) { m = new Picture (large); m.y = (intersection.y + intersection.height); m.height = (large.y + large.height) - m.y; ret.addElement (m); } // if left sides don't match make a left fragment if (intersection.x != large.x) { m = new Picture (large); m.y = intersection.y; m.width = intersection.x - large.x; m.height = intersection.height; ret.addElement (m); } // the center bit if (keep) { m = new Picture (large); m.x = intersection.x; m.y = intersection.y; m.width = intersection.width; m.height = intersection.height; ret.addElement (m); } // if right sides don't match make a right fragment if ((intersection.x + intersection.width) != (large.x + large.width)) { m = new Picture (large); m.x = intersection.x + intersection.width; m.y = intersection.y; m.width = (large.x + large.width) - m.x; m.height = intersection.height; ret.addElement (m); } // if bottoms don't match split off the bottom if (intersection.y != large.y) { m = new Picture (large); m.height = (intersection.y - large.y); ret.addElement (m); } } return (ret); } /** * Find the Picture at position x,y * @param x The x coordinate of the point to examine. * @param y The y coordinate of the point to examine. * @return The picture at that point, or <code>null</code> * if there are none. */ public Picture pictureAt (final int x, final int y) { Picture m; Picture ret; ret = null; for (int i = 0; (null == ret) && (i < mRegions.size ()); i++) { m = (Picture)mRegions.elementAt (i); if (m.contains (x, y)) ret = m; } return (ret); } /** * Move the given picture to the top of the Z order. * @param picture The picture to add. */ public void bringToTop (final Picture picture) { Picture m; Picture ret; ret = null; for (int i = 0; (null == ret) && (i < mRegions.size ()); ) { m = (Picture)mRegions.elementAt (i); if (picture.same (m)) mRegions.removeElementAt (i); else i++; } add (picture); } // // // // Unit test. // // // // // and need to add: // extends // java.awt.Canvas // implements // java.awt.event.ActionListener, // java.awt.event.MouseListener, // java.awt.event.WindowListener // // to the class definition // // boolean mVerbose; // int mCounter; // java.awt.Point origin; // Rectangle last; // int type; // // static java.awt.MenuBar menuMain; // static java.awt.Menu Options; // static java.awt.MenuItem repeat; // static java.awt.MenuItem clear; // static java.awt.TextField status; // // // checks if adding the rectangle causes an overlap // boolean checkAdd (Rectangle r, Vector v) // { // Enumeration e; // boolean ret; // ret = false; // // for (e = v.elements (); !ret && e.hasMoreElements (); ) // ret = r.intersects ((Rectangle)e.nextElement ()); // // return (ret); // } // // void paintwait () // { // java.awt.Graphics g = getGraphics (); // if (null != g) // paint (g); // Thread.yield (); // try // { // Thread.sleep (1000); // } // catch (Exception exception) // { // } // } // // void add () // { // if (null != last) // { // Picture m = new Picture (last); // try // { // m.setURL (new URL ("http://localhost/image#" + mCounter++)); // } // catch (java.net.MalformedURLException murle) // { // murle.printStackTrace (); // } // this.add (m); // repaint (); // } // } // // // // // WindowListener interface // // // public void windowOpened (java.awt.event.WindowEvent e) {} // public void windowClosing (java.awt.event.WindowEvent e) // { // System.exit (0); // } // public void windowClosed (java.awt.event.WindowEvent e) {} // public void windowIconified (java.awt.event.WindowEvent e) {} // public void windowDeiconified (java.awt.event.WindowEvent e) {} // public void windowActivated (java.awt.event.WindowEvent e) {} // public void windowDeactivated (java.awt.event.WindowEvent e) {} // // // // // ActionListener interface // // // public void actionPerformed (java.awt.event.ActionEvent event) // { // Object object = event.getSource(); // if (object == repeat) // add (); // else if (object == clear) // { // mRegions = new Vector (); // repaint (); // } // } // // // // // MouseListener Interface // // // // public void mouseClicked (java.awt.event.MouseEvent event) // { // if (mVerbose) // System.out.println ("DrawTarget.mouseClicked " + event); // } // // public void mouseReleased (java.awt.event.MouseEvent event) // { // if (mVerbose) // System.out.println ("DrawTarget.mouseReleased " + event); // if (null != origin) // { // last = new Rectangle ( // Math.min (origin.x, event.getX ()), // Math.min (origin.y, event.getY ()), // Math.abs (event.getX () - origin.x), // Math.abs (event.getY () - origin.y)); // add (); // origin = null; // } // } // // public void mouseEntered (java.awt.event.MouseEvent event) // { // if (mVerbose) // System.out.println ("DrawTarget.mouseEntered " + event); // } // // public void mouseExited (java.awt.event.MouseEvent event) // { // if (mVerbose) // System.out.println ("DrawTarget.mouseExited " + event); // } // // public void mousePressed (java.awt.event.MouseEvent event) // { // if (mVerbose) // System.out.println ("DrawTarget.mousePressed " + event); // if (event.isMetaDown ()) // { // status.setText (getDetails (event.getX (), event.getY ())); // } // else // origin = new java.awt.Point (event.getX (), event.getY ()); // } // // public void update (java.awt.Graphics graphics) // { // paint (graphics); // } // // static final java.awt.Color[] mColours = // { // java.awt.Color.blue, // java.awt.Color.cyan, // java.awt.Color.gray, // java.awt.Color.green, // java.awt.Color.orange, // java.awt.Color.pink, // java.awt.Color.red, // java.awt.Color.yellow, // java.awt.Color.lightGray, // java.awt.Color.darkGray, // }; // // public void paint (java.awt.Graphics graphics) // { // java.awt.Dimension size = getSize (); // graphics.setColor (getBackground ()); // graphics.fillRect (0, 0, size.width + 1, size.height + 1); // // if (0 == mRegions.size ()) // { // graphics.setColor (getForeground ()); // graphics.drawString ( // "Click and drag to create a picture.", 10, 20); // graphics.drawString ( // "Right click a picture for details.", 10, 40); // } // else // { // Enumeration e = getPictures (); // while (e.hasMoreElements ()) // { // Picture m = (Picture)e.nextElement (); // String url = m.getURL ().toExternalForm (); // int n = url.indexOf ('#'); // n = Integer.parseInt (url.substring (n + 1)) // java.awt.Color colour = mColours[n % mColours.length]; // graphics.setColor (colour); // graphics.fillRect (m.x, m.y, m.width + 1, m.height + 1); // graphics.setColor (java.awt.Color.black); // graphics.drawRect (m.x, m.y, m.width, m.height); // } // checkOverlap (graphics); // } // } // // void checkOverlap (java.awt.Graphics graphics) // { // Picture m; // Picture _m; // Rectangle r; // // graphics.setColor (java.awt.Color.magenta); // for (int i = 0; i < mRegions.size (); i++) // { // m = (Picture)mRegions.elementAt (i); // for (int j = i + 1; j < mRegions.size (); j++) // { // _m = (Picture)mRegions.elementAt (j); // if (m.intersects (_m)) // { // r = m.intersection (_m); // System.out.println ( // "overlap (" // + r.x // + "," // + r.y // + ") (" // + (r.x + r.width) // + "," // + (r.y + r.height) // + ")"); // graphics.fillRect (r.x, r.y, r.width + 1, r.height + 1); // } // } // } // } // // String getDetails (int x, int y) // { // Picture m; // String ret; // // ret = null; // // // find the Picture // for (int i = 0; (null == ret) && (i < mRegions.size ()); i++) // { // m = (Picture)mRegions.elementAt (i); // if (m.contains (x, y)) // ret = m.toString (); // } // if (null == ret) // ret = ""; // // return (ret); // } // // public static void main (String[] args) // { // java.awt.Frame frame; // // frame = new java.awt.Frame (); // frame.setSize (400,400); // menuMain = new java.awt.MenuBar(); // Options = new java.awt.Menu ("Options"); // repeat = new java.awt.MenuItem("Repeat"); // Options.add (repeat); // clear = new java.awt.MenuItem("Clear"); // Options.add (clear); // // menuMain.add (Options); // frame.setMenuBar (menuMain); // // java.awt.Insets insets = frame.getInsets (); // // TileSet buffy = new TileSet (); // buffy.setLocation (insets.left + 10, insets.top + 10); // buffy.setBackground (java.awt.Color.lightGray.brighter ()); // buffy.setVisible (true); // // frame.add (buffy, "Center"); // status = new java.awt.TextField (); // frame.add (status, "South"); // // frame.addWindowListener (buffy); // buffy.addMouseListener (buffy); // repeat.addActionListener (buffy); // clear.addActionListener (buffy); // // frame.setVisible (true); // // } } /* * Revision Control Modification History * * $Log: TileSet.java,v $ * Revision 1.1 2003/09/21 18:20:56 derrickoswald * Thumbelina * Created a lexer GUI application to extract images behind thumbnails. * Added a task in the ant build script - thumbelina - to create the jar file. * You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. * Usage: java -Xmx256M thumbelina.jar [URL] * * */ --- NEW FILE: PicturePanel.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/PicturePanel.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/21 18:20:56 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.lexerapplications.thumbelina; import java.awt.Component; import java.awt.Dimension; import java.awt.Graphics; import java.awt.Image; import java.awt.Insets; import java.awt.Point; import java.awt.Rectangle; import java.awt.event.ComponentEvent; import java.awt.event.ComponentListener; import java.awt.event.HierarchyEvent; import java.awt.event.HierarchyListener; import java.awt.event.MouseEvent; import java.awt.event.MouseListener; import java.util.Enumeration; import java.util.HashSet; import javax.swing.JPanel; import javax.swing.JViewport; import javax.swing.Scrollable; import javax.swing.border.BevelBorder; /** * Hold and display a group of pictures. * @author derrick */ public class PicturePanel extends JPanel implements MouseListener, Scrollable, ComponentListener, HierarchyListener { /** * Scrolling unit increment (both directions). */ protected static final int UNIT_INCREMENT = 10; /** * Scrolling block increment (both directions). */ protected static final int BLOCK_INCREMENT = 100; /** * The thumbelina object in use. */ protected Thumbelina mThumbelina; /** * The display mosaic. */ protected TileSet mMosaic; /** * The preferred size of this component. * <code>null</code> initially, caches the results of * <code>calculatePreferredSize ()</code>. */ protected Dimension mPreferredSize; /** * Creates a new instance of PicturePanel * @param thumbelina The <code>Thumeblina</code> this panel is associated * with. */ public PicturePanel (final Thumbelina thumbelina) { mThumbelina = thumbelina; mMosaic = new TileSet (); mPreferredSize = null; setBorder (new BevelBorder (BevelBorder.LOWERED)); addMouseListener (this); addHierarchyListener (this); } /** * Clears the panel, discarding any existing images. */ public void reset () { mMosaic = new TileSet (); repaint (); } /** * Move the given picture to the top of the Z order. * Adds it, even it if it doesn't exist. * Also puts the URL in the url text of the status bar. * @param picture The picture being brought forward. */ public void bringToTop (final Picture picture) { picture.reset (); mMosaic.bringToTop (picture); repaint (picture.x, picture.y, picture.width, picture.height); mThumbelina.mUrlText.setText (picture.getURL ().toExternalForm ()); } /** * Find a picture with the given URL in the panel. * This should really only be used to discover if the picture is still * visible. There could be more than one picture with the given URL * because it may be partially obscured by another picture, in which * case the pieces are each given their own picture object, but all * point at the same <code>URL</code> and <code>Image</code>. * @param url The url to locate. * @return The first picture encountered in the panel, * or null if the picture was not found. */ public Picture find (final String url) { Enumeration enumeration; Picture picture; Picture ret; ret = null; enumeration = mMosaic.getPictures (); while ((null == ret) && enumeration.hasMoreElements ()) { picture = (Picture)enumeration.nextElement (); if (url.equals (picture.getURL ().toExternalForm ())) ret = picture; } return (ret); } /** * Draw an image on screen. * @param picture The picture to draw. * @param add If <code>true</code>, the picture is added to the history. */ protected void draw (final Picture picture, final boolean add) { Component parent; boolean dolayout; Dimension before; Dimension after; parent = getParent (); dolayout = false; synchronized (mMosaic) { if (parent instanceof JViewport) { before = getPreferredSize (); mMosaic.add (picture); after = calculatePreferredSize (); if (after.width > before.width) dolayout = true; else after.width = before.width; if (after.height > before.height) dolayout = true; else after.height = before.height; if (dolayout) mPreferredSize = after; } else mMosaic.add (picture); } if (dolayout) revalidate (); repaint (picture.x, picture.y, picture.width, picture.height); if (add) mThumbelina.addHistory (picture.getURL ().toExternalForm ()); } /** * Updates this component. * @param graphics The graphics context in which to update the component. */ public void update (final Graphics graphics) { paint (graphics); } /** * Adjust the graphics clip region to account for insets. * @param graphics The graphics object to set the clip region for. */ public void adjustClipForInsets (final Graphics graphics) { Dimension dim; Insets insets; Rectangle clip; dim = getSize (); insets = getInsets (); clip = graphics.getClipBounds (); if (clip.x < insets.left) clip.x = insets.left; if (clip.y < insets.top) clip.y = insets.top; if (clip.x + clip.width > dim.width - insets.right) clip.width = dim.width - insets.right - clip.x; if (clip.y + clip.height > dim.height - insets.bottom) clip.height = dim.height - insets.bottom - clip.y; graphics.setClip (clip.x, clip.y, clip.width, clip.height); } /** * Paints this component. * Runs through the list of tiles and for every one that intersects * the clip region performs a <code>drawImage()</code>. */ public void paint (final Graphics graphics) { Rectangle clip; Enumeration enumeration; HashSet set; // just so we don't draw things twice Picture picture; Image image; Point origin; int width; int height; adjustClipForInsets (graphics); clip = graphics.getClipBounds (); synchronized (mMosaic) { if (0 == mMosaic.getSize ()) super.paint (graphics); else { super.paint (graphics); enumeration = mMosaic.getPictures (); set = new HashSet (); while (enumeration.hasMoreElements ()) { picture = (Picture)enumeration.nextElement (); if ((null == clip) || (clip.intersects (picture))) { image = picture.getImage (); if (!set.contains (image)) { origin = picture.getOrigin (); width = image.getWidth (this); height = image.getHeight (this); graphics.drawImage (picture.getImage (), origin.x, origin.y, origin.x + width, origin.y + height, 0, 0, width, height, this); set.add (image); } } } } } } /** * Get the preferred size of the component. * @return The dimension of this component. */ public Dimension getPreferredSize () { if (null == mPreferredSize) setPreferredSize (calculatePreferredSize ()); else if ((0 == mPreferredSize.width) || (0 == mPreferredSize.height)) setPreferredSize (calculatePreferredSize ()); return (mPreferredSize); } /** * Sets the preferred size of this component. * @param dimension The new value to use for * <code>getPreferredSize()</code> until recalculated. */ public void setPreferredSize (final Dimension dimension) { mPreferredSize = dimension; } /** * Compute the preferred size of the component. * Computes the minimum bounding rectangle covering all the pictures in * the panel. It then does some funky stuff to handle * embedding in the view port of a scroll pane, basically asking * up the ancestor heirarchy what size is available, and filling it. * @return The optimal dimension for this component. */ protected Dimension calculatePreferredSize () { Enumeration enumeration; int x; int y; Picture picture; Component parent; Insets insets; Dimension ret; enumeration = mMosaic.getPictures (); x = 0; y = 0; picture = null; while (enumeration.hasMoreElements ()) { picture = (Picture)enumeration.nextElement (); if (picture.x + picture.width > x) x = picture.x + picture.width; if (picture.y + picture.height > y) y = picture.y + picture.height; } parent = getParent (); if (parent instanceof JViewport) { ret = parent.getSize (); insets = ((JViewport)parent).getInsets (); ret.width -= insets.left + insets.right; ret.height -= insets.top + insets.bottom; if ((0 != ret.width) || (0 != ret.height)) ret.width -= 2; // ... I dunno why, it just needs it if (ret.width < x) ret.width = x; if (ret.height < y) ret.height = y; } else { insets = getInsets (); x += insets.left + insets.right; y += insets.top + insets.bottom; ret = new Dimension (x, y); } return (ret); } // // MouseListener Interface // /** * Invoked when the mouse button has been clicked * (pressed and released) on a component. * <i>Not used.</i> * @param event The object providing details of the mouse event. */ public void mouseClicked (final MouseEvent event) { } /** *Invoked when a mouse button has been released on a component. * <i>Not used.</i> * @param event The object providing details of the mouse event. */ public void mouseReleased (final MouseEvent event) { } /** * Invoked when the mouse enters a component. * <i>Not used.</i> * @param event The object providing details of the mouse event. */ public void mouseEntered (final MouseEvent event) { } /** * Invoked when the mouse exits a component. * <i>Not used.</i> * @param event The object providing details of the mouse event. */ public void mouseExited (final MouseEvent event) { } /** * Handle left click on a picture by bringing it to the top. * @param event The object providing details of the mouse event. */ public void mousePressed (final MouseEvent event) { Picture picture; if (!event.isMetaDown ()) { picture = mMosaic.pictureAt (event.getX (), event.getY ()); if (null != picture) bringToTop (picture); } } // // Scrollable interface // /** * Returns the preferred size of the viewport for a view component. * For example the preferredSize of a JList component is the size * required to accommodate all of the cells in its list however the * value of preferredScrollableViewportSize is the size required for * JList.getVisibleRowCount() rows. A component without any properties * that would effect the viewport size should just return * getPreferredSize() here. * * @return The preferredSize of a JViewport whose view is this Scrollable. * @see JViewport#getPreferredSize */ public Dimension getPreferredScrollableViewportSize () { return (getPreferredSize ()); } /** * Components that display logical rows or columns should compute * the scroll increment that will completely expose one new row * or column, depending on the value of orientation. Ideally, * components should handle a partially exposed row or column by * returning the distance required to completely expose the item. * <p> * Scrolling containers, like JScrollPane, will use this method * each time the user requests a unit scroll. * * @param visibleRect The view area visible within the viewport * @param orientation Either SwingConstants.VERTICAL or * SwingConstants.HORIZONTAL. * @param direction Less than zero to scroll up/left, * greater than zero for down/right. * @return The "unit" increment for scrolling in the specified direction. * This value should always be positive. */ public int getScrollableUnitIncrement ( final Rectangle visibleRect, final int orientation, final int direction) { return (UNIT_INCREMENT); } /** * Components that display logical rows or columns should compute * the scroll increment that will completely expose one block * of rows or columns, depending on the value of orientation. * <p> * Scrolling containers, like JScrollPane, will use this method * each time the user requests a block scroll. * * @param visibleRect The view area visible within the viewport * @param orientation Either SwingConstants.VERTICAL or * SwingConstants.HORIZONTAL. * @param direction Less than zero to scroll up/left, * greater than zero for down/right. * @return The "block" increment for scrolling in the specified direction. * This value should always be positive. */ public int getScrollableBlockIncrement ( final Rectangle visibleRect, final int orientation, final int direction) { return (BLOCK_INCREMENT); } /** * Return true if a viewport should always force the width of this * <code>Scrollable</code> to match the width of the viewport. * For example a normal * text view that supported line wrapping would return true here, since it * would be undesirable for wrapped lines to disappear beyond the right * edge of the viewport. Note that returning true for a Scrollable * whose ancestor is a JScrollPane effectively disables horizontal * scrolling. * <p> * Scrolling containers, like JViewport, will use this method each * time they are validated. * * @return <code>true</code> if a viewport should force the Scrollables * width to match its own. */ public boolean getScrollableTracksViewportWidth () { return (false); } /** * Return true if a viewport should always force the height of this * Scrollable to match the height of the viewport. For example a * columnar text view that flowed text in left to right columns * could effectively disable vertical scrolling by returning * true here. * <p> * Scrolling containers, like JViewport, will use this method each * time they are validated. * * @return <code>true</code> if a viewport should force the Scrollables * height to match its own. */ public boolean getScrollableTracksViewportHeight () { return (false); } // // ComponentListener interface // /** * Invoked when the container's size changes. * Un-caches the preferred size. * @param event The resize event. */ public void componentResized (final ComponentEvent event) { setPreferredSize (null); } /** * Invoked when the component's position changes. * <i>Not used.</I> * @param event The component event. */ public void componentMoved (final ComponentEvent event) { } /** * Invoked when the component has been made visible. * <i>Not used.</I> * @param event The component event. */ public void componentShown (final ComponentEvent event) { } /** * Invoked when the component has been made invisible. * <i>Not used.</I> * @param event The component event. */ public void componentHidden (final ComponentEvent event) { } // // HierarchyListener interface // /** * Handles this components ancestor being added to a container. * Registers this component as a listener for size changes on the * ancestor so that we may un-cache the prefereed size and force * a recalculation. * @param event The heirarchy event. */ public void hierarchyChanged (final HierarchyEvent event) { if (0 != (event.getChangeFlags () & HierarchyEvent.PARENT_CHANGED)) { Component dad = event.getChanged (); Component parent = getParent (); if ((null != parent) && (parent.getParent () == dad)) dad.addComponentListener (this); } } } /* * Revision Control Modification History * * $Log: PicturePanel.java,v $ * Revision 1.1 2003/09/21 18:20:56 derrickoswald * Thumbelina * Created a lexer GUI application to extract images behind thumbnails. * Added a task in the ant build script - thumbelina - to create the jar file. * You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. * Usage: java -Xmx256M thumbelina.jar [URL] * * */ --- NEW FILE: ThumbelinaFrame.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/ThumbelinaFrame.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/21 18:20:56 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU [...1054 lines suppressed...] || args[0].equalsIgnoreCase ("-help") || args[0].equalsIgnoreCase ("-h") || args[0].equalsIgnoreCase ("?") || args[0].equalsIgnoreCase ("-?")) Thumbelina.help (); else url = args[0]; try { frame = new ThumbelinaFrame (url); frame.setVisible (true); } catch (MalformedURLException murle) { System.err.println (murle.getMessage ()); Thumbelina.help (); } } } --- NEW FILE: Picture.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/Picture.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/21 18:20:56 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.lexerapplications.thumbelina; import java.awt.Image; import java.awt.Point; import java.awt.Rectangle; import java.net.URL; /** * Class to track pictures within the frame. * Maintains an image, an area and the URL for it. */ public class Picture extends Rectangle { /** * The origin for new points from the zero args constructor. */ public static final Point ORIGIN = new Point (0, 0); /** * The URL for the picture. */ protected URL mURL; /** * The image for the picture. */ protected Image mImage; /** * The upper left hand corner of the image. * This doesn't change, even if the image is cropped. * For example, if the left half of the image is obscured by another, * the <code>Rectangle</code> fields <code>x</code>, <code>y</code>, * <code>width</code> and <code>height</code> will change, but the * origin remains the same. */ protected Point mOrigin; /** * Construct a Picture. */ public Picture () { setURL (null); setImage (null); setOrigin (ORIGIN); } /** * Construct a Picture over the area given. * @param x The x coordinate. * @param y The y coordinate. * @param width The width of the picture. * @param height The height of the picture. */ public Picture (final int x, final int y, final int width, final int height) { super (x, y, width, height); setURL (null); setImage (null); setOrigin (new Point (x, y)); } /** * Construct a picture over the rectangle given. * @param r The coordinates of the area. */ public Picture (final Rectangle r) { super (r); setURL (null); setImage (null); setOrigin (new Point (r.x, r.y)); } /** * Construct a picture from the one given. * @param picture The picture to copy. */ public Picture (final Picture picture) { super (picture); setURL (picture.getURL ()); setImage (picture.getImage ()); setOrigin (picture.getOrigin ()); } /** * Getter for property URL. * @return Value of property URL. */ public URL getURL () { return (mURL); } /** * Setter for property URL. * @param url New value of property URL. */ public void setURL (final URL url) { mURL = url; } /** Getter for property image. * @return Value of property image. */ public Image getImage () { return (mImage); } /** Setter for property image. * @param image New value of property image. */ public void setImage (final Image image) { mImage = image; if (null != image) { width = image.getWidth (null); height = image.getHeight (null); } } /** Getter for property origin. * @return Value of property origin. */ public Point getOrigin () { return (mOrigin); } /** Setter for property origin. * @param origin New value of property origin. */ public void setOrigin (final Point origin) { mOrigin = origin; } /** * Return <code>true</code> if that picture is the same as this one. * @param picture The picture to check. * @return <code>true</code> if the images match. */ public boolean same (final Picture picture) { return (mImage == picture.mImage); } /** * Reset the picture to uncropped size. */ public void reset () { setBounds (mOrigin.x, mOrigin.y, mImage.getWidth (null), mImage.getHeight (null)); } /** * Create a string representation of the modifcation. * @return A string that shows this picture. */ public String toString () { StringBuffer ret; ret = new StringBuffer (); ret.append (getURL ().toString ()); ret.append ("[x="); ret.append (Integer.toString (x)); ret.append (",y="); ret.append (Integer.toString (y)); ret.append (",width="); ret.append (Integer.toString (width)); ret.append (",height="); ret.append (Integer.toString (height)); ret.append ("]"); return (ret.toString ()); } } /* * Revision Control Modification History * * $Log: Picture.java,v $ * Revision 1.1 2003/09/21 18:20:56 derrickoswald * Thumbelina * Created a lexer GUI application to extract images behind thumbnails. * Added a task in the ant build script - thumbelina - to create the jar file. * You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. * Usage: java -Xmx256M thumbelina.jar [URL] * * */ --- NEW FILE: Sequencer.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina/Sequencer.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/21 18:20:56 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.lexerapplications.thumbelina; import java.awt.Component; import java.awt.Dimension; import java.awt.Image; import java.awt.Insets; import java.awt.Point; import java.net.URL; import java.util.ArrayList; import java.util.Random; import javax.swing.JViewport; /** * Display received images at a constant rate. */ public class Sequencer extends Thread { /** * The default delay time, {@value} milliseconds. */ protected static final int DEFAULT_DELAY = 500; /** * The thumbelina object to drive. */ protected Thumbelina mThumbelina; /** * Pictures awaiting display. */ protected ArrayList mPending; /** * Activity state. * <code>true</code> means fetching and displaying, <code>false</code> not. */ protected boolean mActive; /** * Delay between picture displays. */ protected int mDelay; /** * Random number generator for picture placement. */ protected Random mRandom; /** * Creates a new instance of a Sequencer. * @param thumbelina The object to push images to. */ public Sequencer (final Thumbelina thumbelina) { mThumbelina = thumbelina; mPending = new ArrayList (); mActive = true; setDelay (DEFAULT_DELAY); mRandom = new Random (); setName ("Sequencer"); // only good if there's just one of these start (); } /** * Clears the pending images list. */ public void reset () { synchronized (mPending) { mPending.clear (); mThumbelina.mReadyProgress.setValue (0); mPending.notify (); } } /** * Compute a random point to load the image. * Generate a random point for one of the corners of the image and * then condition the numbers so the image is on screen. * @param url The url this picture was fetched from. * Used in computing the random position, so the picture is always * placed in the same location, even when refetched. * @param width The width of the image. * @param height The height of the image. * @return The random point to use. */ protected Point random (final String url, final int width, final int height) { Component parent; Component grandparent; Dimension dim; Insets insets; int minx; int miny; int maxx; int maxy; int rndx; int rndy; int corner; Point ret; parent = mThumbelina.getPicturePanel ().getParent (); if (parent instanceof JViewport) { grandparent = parent.getParent (); // JScrollPane dim = grandparent.getSize (); } else dim = mThumbelina.getPicturePanel ().getSize (); insets = mThumbelina.getPicturePanel ().getInsets (); dim.width -= (insets.left + insets.right); dim.height -= (insets.top + insets.bottom); minx = insets.left; miny = insets.top; maxx = minx + dim.width; maxy = miny + dim.height; mRandom.setSeed ((((long)(width + height)) << 32) + url.hashCode ()); rndx = (int)(mRandom.nextDouble () * dim.width); rndy = (int)(mRandom.nextDouble () * dim.height); corner = (int)(mRandom.nextDouble () * 4); // the panel has four corners ret = new Point (0, 0); switch (corner) { case 0: // upper left if (rndx + width >= maxx) ret.x = maxx - width; else ret.x = rndx; if (rndy + height >= maxy) ret.y = maxy - height; else ret.y = rndy; break; case 1: // upper right if (rndx - width < minx) ret.x = minx; else ret.x = rndx - width; if (rndy + height >= maxy) ret.y = maxy - height; else ret.y = rndy; break; case 2: // lower right if (rndx - width < minx) ret.x = minx; else ret.x = rndx - width; if (rndy - height < miny) ret.y = miny; else ret.y = rndy - height; break; case 3: // lower left if (rndx + width >= maxx) ret.x = maxx - width; else ret.x = rndx; if (rndy - height < miny) ret.y = miny; else ret.y = rndy - height; break; default: throw new IllegalStateException ("random corner = " + corner); } // if it's really large stuff it in the upper left hand corner if (ret.x < 0) ret.x = 0; if (ret.y < 0) ret.y = 0; return (ret); } /** * Add an image to the pending list. * @param image The image to add. * @param url The url the image came from. */ public void add (final Image image, final URL url) { add (image, url, true); } /** * Add an image to the panel. * @param image The image to add. * @param url The url the image came from. * @param background If <code>true</code>, just add to pending list. */ public void add (final Image image, final URL url, final boolean background) { int x; int y; Point p; Picture picture; int size; x = image.getWidth (null); y = image.getHeight (null); picture = new Picture (); picture.setImage (image); picture.setURL (url); if (background) synchronized (mPending) { mPending.add (picture); size = mPending.size (); if (mThumbelina.mReadyProgress.getMaximum () < size) mThumbelina.mReadyProgress.setMaximum (size); mThumbelina.mReadyProgress.setValue (size); mPending.notify (); } else place (picture, false); } /** * Place a picture in the display area. * Places the picture at a random location on screen. * @param picture The picture to place on screen. * @param add If <code>true</code>, the picture is added to the history. */ protected void place (final Picture picture, final boolean add) { Point p; if (Picture.ORIGIN == picture.getOrigin ()) { // never been placed before p = random ( picture.getURL ().toExternalForm (), picture.width, picture.height); picture.x = p.x; picture.y = p.y; picture.setOrigin (p); } mThumbelina.getPicturePanel ().draw (picture, add); } // // Runnable interface // /** * Display pictures from pending list with delay between. * If the list is empty it waits on the pending list for new pictures. */ public void run () { Picture picture; int size; Point p; while (true) { try { picture = null; synchronized (mPending) { if (mActive && !mPending.isEmpty ()) picture = (Picture)mPending.remove (0); else try { mPending.wait (); } catch (InterruptedException ie) { ie.printStackTrace (); } size = mPending.size (); if (mThumbelina.mReadyProgress.getMaximum () < size) mThumbelina.mReadyProgress.setMaximum (size); mThumbelina.mReadyProgress.setValue (size); } if (null != picture) { place (picture, true); if (0 != getDelay ()) try { sleep (getDelay ()); } catch (InterruptedException ie) { ie.printStackTrace (); } } } catch (Throwable t) { t.printStackTrace (); } } } /** * Getter for property delay. * @return Value of property delay. */ public int getDelay () { return (mDelay); } /** * Setter for property delay. * @param delay New value of property delay. */ public void setDelay (final int delay) { mDelay = delay; } } /* * Revision Control Modification History * * $Log: Sequencer.java,v $ * Revision 1.1 2003/09/21 18:20:56 derrickoswald * Thumbelina * Created a lexer GUI application to extract images behind thumbnails. * Added a task in the ant build script - thumbelina - to create the jar file. * You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. * Usage: java -Xmx256M thumbelina.jar [URL] * * */ |
From: <der...@us...> - 2003-09-21 18:20:59
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv16611 Modified Files: build.xml Log Message: Thumbelina Created a lexer GUI application to extract images behind thumbnails. Added a task in the ant build script - thumbelina - to create the jar file. You need JDK 1.4.x to build it. It can be run on JDK 1.3.x in crippled mode. Usage: java -Xmx256M thumbelina.jar [URL] Index: build.xml =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/build.xml,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** build.xml 10 Sep 2003 03:38:17 -0000 1.46 --- build.xml 21 Sep 2003 18:20:55 -0000 1.47 *************** *** 306,309 **** --- 306,326 ---- </target> + <!-- Create the Thumbelina jar --> + <target name="thumbelina" depends="jarlexer" description="create thumbelina.jar"> + <javac compiler="javac1.4" srcdir="${src}" debug="on" classpath="src:${dist}/lib/htmllexer.jar"> + <include name="org/htmlparser/lexerapplications/thumbelina/**/*.java"/> + </javac> + <jar jarfile="${dist}/lib/thumbelina.jar" + basedir="${src}" + defaultexcludes="no" + update="true"> + <include name="org/htmlparser/lexerapplications/thumbelina/**/*.class"/> + <zipfileset src="${dist}/lib/htmllexer.jar" includes="**/*.class"/> + <manifest> + <attribute name="Main-Class" value="org.htmlparser.lexerapplications.thumbelina.Thumbelina"/> + </manifest> + </jar> + </target> + <!-- Run the unit tests --> <target name="test" depends="jar" description="run the JUnit tests"> |
From: <der...@us...> - 2003-09-21 18:08:46
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina In directory sc8-pr-cvs1:/tmp/cvs-serv14237/thumbelina Log Message: Directory /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/thumbelina added to the repository |
From: <der...@us...> - 2003-09-21 18:08:03
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv14046 Added Files: java.header Log Message: Re-add java.header to get the keyword substitution turned off. |
From: <der...@us...> - 2003-09-21 18:07:01
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv13755 Removed Files: java.header Log Message: Re-add java.header to get the keyword substitution turned off. --- java.header DELETED --- |
From: <der...@us...> - 2003-09-21 18:05:53
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv13591 Added Files: java.header Log Message: Re-add java.header to get the keyword substitution turned off. |
From: <der...@us...> - 2003-09-21 18:04:14
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv13280 Removed Files: java.header Log Message: Re-add java.header to get the keyword substitution turned off. --- java.header DELETED --- |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests/tagTests Modified Files: AllTests.java AppletTagTest.java BaseHrefTagTest.java BodyTagTest.java CompositeTagTest.java DoctypeTagTest.java EndTagTest.java FormTagTest.java FrameSetTagTest.java FrameTagTest.java ImageTagTest.java InputTagTest.java JspTagTest.java LinkTagTest.java MetaTagTest.java ObjectCollectionTest.java OptionTagTest.java ScriptTagTest.java SelectTagTest.java StyleTagTest.java TagTest.java TextareaTagTest.java TitleTagTest.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/AllTests.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** AllTests.java 8 Sep 2003 02:26:31 -0000 1.42 --- AllTests.java 10 Sep 2003 03:38:24 -0000 1.43 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 31,35 **** import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { public AllTests(String name) { --- 31,35 ---- import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { public AllTests(String name) { *************** *** 42,46 **** suite.addTestSuite(ScriptTagTest.class); suite.addTestSuite(ImageTagTest.class); ! suite.addTestSuite(LinkTagTest.class); suite.addTestSuite(TagTest.class); suite.addTestSuite(TitleTagTest.class); --- 42,46 ---- suite.addTestSuite(ScriptTagTest.class); suite.addTestSuite(ImageTagTest.class); ! suite.addTestSuite(LinkTagTest.class); suite.addTestSuite(TagTest.class); suite.addTestSuite(TitleTagTest.class); *************** *** 51,55 **** suite.addTestSuite(AppletTagTest.class); suite.addTestSuite(FrameTagTest.class); ! suite.addTestSuite(FrameSetTagTest.class); suite.addTestSuite(InputTagTest.class); suite.addTestSuite(OptionTagTest.class); --- 51,55 ---- suite.addTestSuite(AppletTagTest.class); suite.addTestSuite(FrameTagTest.class); ! suite.addTestSuite(FrameSetTagTest.class); suite.addTestSuite(InputTagTest.class); suite.addTestSuite(OptionTagTest.class); *************** *** 61,65 **** suite.addTestSuite(BodyTagTest.class); suite.addTestSuite(CompositeTagTest.class); ! return suite; } --- 61,65 ---- suite.addTestSuite(BodyTagTest.class); suite.addTestSuite(CompositeTagTest.class); ! return suite; } Index: AppletTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/AppletTagTest.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** AppletTagTest.java 8 Sep 2003 02:26:31 -0000 1.26 --- AppletTagTest.java 10 Sep 2003 03:38:24 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 59,69 **** // Check the data in the applet tag AppletTag appletTag = (AppletTag)node[0]; ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } --- 59,69 ---- // Check the data in the applet tag AppletTag appletTag = (AppletTag)node[0]; ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } *************** *** 87,99 **** appletTag.setCodeBase ("htmlparser.sourceforge.net"); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"htmlparser.sourceforge.net\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } ! public void testChangeArchive() throws ParserException { String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; --- 87,99 ---- appletTag.setCodeBase ("htmlparser.sourceforge.net"); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"htmlparser.sourceforge.net\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } ! public void testChangeArchive() throws ParserException { String [][]paramsData = {{"Param1","Value1"},{"Name","Somik"},{"Age","23"}}; *************** *** 115,125 **** appletTag.setArchive ("htmlparser.jar"); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"htmlparser.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } --- 115,125 ---- appletTag.setArchive ("htmlparser.jar"); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"htmlparser.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } *************** *** 143,153 **** appletTag.setAppletClass ("MyOtherClass.class"); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"MyOtherClass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } --- 143,153 ---- appletTag.setAppletClass ("MyOtherClass.class"); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"MyOtherClass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ ! "<PARAM VALUE=\"Value1\" NAME=\"Param1\">\r\n"+ "<PARAM VALUE=\"Somik\" NAME=\"Name\">\r\n"+ "<PARAM VALUE=\"23\" NAME=\"Age\">\r\n"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } *************** *** 177,187 **** appletTag.setAppletParams (paramsMap); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ "<PARAM VALUE=\"Two\" NAME=\"Second\">"+ ! "<PARAM VALUE=\"One\" NAME=\"First\">"+ "<PARAM VALUE=\"3\" NAME=\"Third\">"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } } --- 177,187 ---- appletTag.setAppletParams (paramsMap); // Check the data in the applet tag ! String expectedRawString = "<APPLET CODE=\"Myclass.class\" CODEBASE=\"www.kizna.com\" ARCHIVE=\"test.jar\">\r\n"+ "<PARAM VALUE=\"Two\" NAME=\"Second\">"+ ! "<PARAM VALUE=\"One\" NAME=\"First\">"+ "<PARAM VALUE=\"3\" NAME=\"Third\">"+ "</APPLET>"; ! assertStringEquals("toHTML()",expectedRawString,appletTag.toHtml()); } } Index: BaseHrefTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BaseHrefTagTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** BaseHrefTagTest.java 8 Sep 2003 02:26:31 -0000 1.25 --- BaseHrefTagTest.java 10 Sep 2003 03:38:24 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 39,48 **** super(name); } ! public void testConstruction() { BaseHrefTag baseRefTag = new BaseHrefTag(new TagData(0,0,"",""),"http://www.abc.com"); assertEquals("Expected Base URL","http://www.abc.com",baseRefTag.getBaseUrl()); } ! public void testNotHREFBaseTag() throws ParserException { createParser("<base target=\"_top\">"); --- 39,48 ---- super(name); } ! public void testConstruction() { BaseHrefTag baseRefTag = new BaseHrefTag(new TagData(0,0,"",""),"http://www.abc.com"); assertEquals("Expected Base URL","http://www.abc.com",baseRefTag.getBaseUrl()); } ! public void testNotHREFBaseTag() throws ParserException { createParser("<base target=\"_top\">"); Index: BodyTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/BodyTagTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** BodyTagTest.java 8 Sep 2003 02:26:31 -0000 1.11 --- BodyTagTest.java 10 Sep 2003 03:38:24 -0000 1.12 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 45,49 **** super(name); } ! protected void setUp() throws Exception { super.setUp(); --- 45,49 ---- super(name); } ! protected void setUp() throws Exception { super.setUp(); *************** *** 55,72 **** bodyTag = (BodyTag) node[4]; } ! public void testToPlainTextString() throws ParserException { // check the label node ! assertEquals("Body","Yahoo!",bodyTag.toPlainTextString()); } public void testToHTML() throws ParserException { ! assertStringEquals("Raw String","<BODY>Yahoo!</BODY>",bodyTag.toHtml()); } public void testToString() throws ParserException { ! assertEquals("Body","BODY: Yahoo!",bodyTag.toString()); } ! public void testAttributes () { --- 55,72 ---- bodyTag = (BodyTag) node[4]; } ! public void testToPlainTextString() throws ParserException { // check the label node ! assertEquals("Body","Yahoo!",bodyTag.toPlainTextString()); } public void testToHTML() throws ParserException { ! assertStringEquals("Raw String","<BODY>Yahoo!</BODY>",bodyTag.toHtml()); } public void testToString() throws ParserException { ! assertEquals("Body","BODY: Yahoo!",bodyTag.toString()); } ! public void testAttributes () { *************** *** 74,78 **** Node node; Hashtable attributes; ! try { --- 74,78 ---- Node node; Hashtable attributes; ! try { *************** *** 101,111 **** } } ! ! public static TestSuite suite() { return new TestSuite(BodyTagTest.class); } ! ! public static void main(String[] args) { new junit.awtui.TestRunner().start(new String[] {BodyTagTest.class.getName()}); --- 101,111 ---- } } ! ! public static TestSuite suite() { return new TestSuite(BodyTagTest.class); } ! ! public static void main(String[] args) { new junit.awtui.TestRunner().start(new String[] {BodyTagTest.class.getName()}); Index: CompositeTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/CompositeTagTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** CompositeTagTest.java 8 Sep 2003 02:26:31 -0000 1.7 --- CompositeTagTest.java 10 Sep 2003 03:38:24 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 56,62 **** parseAndAssertNodeCount(1); TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = tableTag.digupStringNode("Hello World"); ! assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); --- 56,62 ---- parseAndAssertNodeCount(1); TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = tableTag.digupStringNode("Hello World"); ! assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); *************** *** 71,75 **** assertSame("should be original table",tableTag,parent); } ! public void testFindPositionOf() throws ParserException { createParser( --- 71,75 ---- assertSame("should be original table",tableTag,parent); } ! public void testFindPositionOf() throws ParserException { createParser( *************** *** 88,94 **** parseAndAssertNodeCount(1); TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = tableTag.digupStringNode("Hello World"); ! assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); --- 88,94 ---- parseAndAssertNodeCount(1); TableTag tableTag = (TableTag)node[0]; ! StringNode [] stringNode = tableTag.digupStringNode("Hello World"); ! assertEquals("number of string nodes",1,stringNode.length); assertNotNull("should have found string node",stringNode); *************** *** 96,99 **** int pos = parent.findPositionOf(stringNode[0]); assertEquals("position",5,pos); ! } } --- 96,99 ---- int pos = parent.findPositionOf(stringNode[0]); assertEquals("position",5,pos); ! } } Index: DoctypeTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/DoctypeTagTest.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** DoctypeTagTest.java 8 Sep 2003 02:26:31 -0000 1.26 --- DoctypeTagTest.java 10 Sep 2003 03:38:24 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: EndTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/EndTagTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** EndTagTest.java 8 Sep 2003 02:26:31 -0000 1.27 --- EndTagTest.java 10 Sep 2003 03:38:24 -0000 1.28 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** createParser("<HTML></HTML>"); // Register the image scanner ! parser.registerScanners(); parseAndAssertNodeCount(2); // The node should be an HTMLLinkTag --- 43,47 ---- createParser("<HTML></HTML>"); // Register the image scanner ! parser.registerScanners(); parseAndAssertNodeCount(2); // The node should be an HTMLLinkTag *************** *** 50,56 **** assertEquals("Raw String","</HTML>",endTag.toHtml()); } ! public void testEndTagFind() { ! String testHtml = "<SCRIPT>document.write(d+\".com\")</SCRIPT>"; int pos = testHtml.indexOf("</SCRIPT>"); --- 50,56 ---- assertEquals("Raw String","</HTML>",endTag.toHtml()); } ! public void testEndTagFind() { ! String testHtml = "<SCRIPT>document.write(d+\".com\")</SCRIPT>"; int pos = testHtml.indexOf("</SCRIPT>"); Index: FormTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FormTagTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** FormTagTest.java 8 Sep 2003 02:26:31 -0000 1.30 --- FormTagTest.java 10 Sep 2003 03:38:24 -0000 1.31 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 55,64 **** formTag.setFormLocation("http://www.yahoo.com/yahoo/do_not_login.jsp"); ! String expectedHTML = "<FORM ACTION=\"http://www.yahoo.com/yahoo/do_not_login.jsp\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"+ FormScannerTest.EXPECTED_FORM_HTML_REST_OF_FORM; assertStringEquals("Raw String",expectedHTML,formTag.toHtml()); } ! public void testToPlainTextString() throws ParserException { createParser(FormScannerTest.FORM_HTML); --- 55,64 ---- formTag.setFormLocation("http://www.yahoo.com/yahoo/do_not_login.jsp"); ! String expectedHTML = "<FORM ACTION=\"http://www.yahoo.com/yahoo/do_not_login.jsp\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"+ FormScannerTest.EXPECTED_FORM_HTML_REST_OF_FORM; assertStringEquals("Raw String",expectedHTML,formTag.toHtml()); } ! public void testToPlainTextString() throws ParserException { createParser(FormScannerTest.FORM_HTML); *************** *** 70,74 **** assertStringEquals("Form Tag string representation"," User NamePassword Contents of TextArea",formTag.toPlainTextString()); } ! public void testSearchFor() throws ParserException { createParser(FormScannerTest.FORM_HTML); --- 70,74 ---- assertStringEquals("Form Tag string representation"," User NamePassword Contents of TextArea",formTag.toPlainTextString()); } ! public void testSearchFor() throws ParserException { createParser(FormScannerTest.FORM_HTML); *************** *** 80,86 **** NodeList nodeList = formTag.searchFor("USER NAME"); assertEquals("Should have found nodes",1,nodeList.size()); ! Node[] nodes = nodeList.toNodeArray(); ! assertEquals("Number of nodes found",1,nodes.length); assertType("search result node",StringNode.class,nodes[0]); --- 80,86 ---- NodeList nodeList = formTag.searchFor("USER NAME"); assertEquals("Should have found nodes",1,nodeList.size()); ! Node[] nodes = nodeList.toNodeArray(); ! assertEquals("Number of nodes found",1,nodes.length); assertType("search result node",StringNode.class,nodes[0]); *************** *** 98,107 **** NodeList nodeList = formTag.searchFor("USER NAME",true); assertEquals("Should have not found nodes",0,nodeList.size()); ! nodeList = formTag.searchFor("User Name",true); assertNotNull("Should have not found nodes",nodeList); } ! public void testSearchByName() throws ParserException { createParser(FormScannerTest.FORM_HTML); --- 98,107 ---- NodeList nodeList = formTag.searchFor("USER NAME",true); assertEquals("Should have not found nodes",0,nodeList.size()); ! nodeList = formTag.searchFor("User Name",true); assertNotNull("Should have not found nodes",nodeList); } ! public void testSearchByName() throws ParserException { createParser(FormScannerTest.FORM_HTML); *************** *** 115,122 **** assertNotNull("Should have found the password node",tag); assertType("tag found",InputTag.class,tag); ! } ! /** ! * Bug 713907 reported by Dhaval Udani, erroneous * attributes being reported. */ --- 115,122 ---- assertNotNull("Should have found the password node",tag); assertType("tag found",InputTag.class,tag); ! } ! /** ! * Bug 713907 reported by Dhaval Udani, erroneous * attributes being reported. */ *************** *** 130,134 **** ); parser.registerScanners(); ! FormTag formTag = (FormTag)(parser.extractAllNodesThatAre( FormTag.class --- 130,134 ---- ); parser.registerScanners(); ! FormTag formTag = (FormTag)(parser.extractAllNodesThatAre( FormTag.class Index: FrameSetTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FrameSetTagTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** FrameSetTagTest.java 8 Sep 2003 02:26:31 -0000 1.27 --- FrameSetTagTest.java 10 Sep 2003 03:38:24 -0000 1.28 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** public void testToHTML() throws ParserException{ createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ --- 43,47 ---- public void testToHTML() throws ParserException{ createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ *************** *** 50,59 **** parser.addScanner(new FrameSetScanner("")); parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(1); assertTrue("Node 0 should be End Tag",node[0] instanceof FrameSetTag); FrameSetTag frameSetTag = (FrameSetTag)node[0]; assertStringEquals("HTML Contents", ! "<FRAMESET BORDER=\"0\" ROWS=\"115,*\" FRAMESPACING=\"0\" FRAMEBORDER=\"NO\">\r\n"+ "<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>\r\n"+ "<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">\r\n"+ --- 50,59 ---- parser.addScanner(new FrameSetScanner("")); parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(1); assertTrue("Node 0 should be End Tag",node[0] instanceof FrameSetTag); FrameSetTag frameSetTag = (FrameSetTag)node[0]; assertStringEquals("HTML Contents", ! "<FRAMESET BORDER=\"0\" ROWS=\"115,*\" FRAMESPACING=\"0\" FRAMEBORDER=\"NO\">\r\n"+ "<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>\r\n"+ "<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">\r\n"+ Index: FrameTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/FrameTagTest.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** FrameTagTest.java 8 Sep 2003 02:26:31 -0000 1.27 --- FrameTagTest.java 10 Sep 2003 03:38:24 -0000 1.28 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,51 **** public void testToHTML() throws ParserException { createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ "</frameset>"); parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(4); assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); --- 42,51 ---- public void testToHTML() throws ParserException { createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ "</frameset>"); parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(4); assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); *************** *** 53,60 **** FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! assertStringEquals("Frame 1 toHTML()","<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>",frameTag1.toHtml()); ! assertStringEquals("Frame 2 toHTML()","<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">",frameTag2.toHtml()); } } --- 53,60 ---- FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; ! assertStringEquals("Frame 1 toHTML()","<FRAME SCROLLING=\"NO\" FRAMEBORDER=\"NO\" SRC=\"demo_bc_top.html\" NAME=\"topFrame\" NORESIZE>",frameTag1.toHtml()); ! assertStringEquals("Frame 2 toHTML()","<FRAME SCROLLING=\"AUTO\" SRC=\"http://www.kizna.com/web_e/\" NAME=\"mainFrame\">",frameTag2.toHtml()); } } Index: ImageTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/ImageTagTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** ImageTagTest.java 8 Sep 2003 02:26:31 -0000 1.29 --- ImageTagTest.java 10 Sep 2003 03:38:24 -0000 1.30 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 39,43 **** import org.htmlparser.util.SimpleNodeIterator; ! public class ImageTagTest extends ParserTestCase { public ImageTagTest(String name) { --- 39,43 ---- import org.htmlparser.util.SimpleNodeIterator; ! public class ImageTagTest extends ParserTestCase { public ImageTagTest(String name) { *************** *** 57,61 **** // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLImageTag --- 57,61 ---- // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLImageTag *************** *** 77,81 **** // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLImageTag --- 77,81 ---- // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLImageTag *************** *** 97,101 **** // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLImageTag --- 97,101 ---- // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLImageTag *************** *** 114,122 **** // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("Image incorrect","http://www.cj.com/abcd.jpg",imageTag.getImageURL()); } --- 114,122 ---- // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); assertTrue("Node should be a HTMLImageTag",node[0] instanceof ImageTag); ImageTag imageTag = (ImageTag)node[0]; ! assertEquals("Image incorrect","http://www.cj.com/abcd.jpg",imageTag.getImageURL()); } *************** *** 133,137 **** // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLLinkTag --- 133,137 ---- // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an HTMLLinkTag *************** *** 145,149 **** // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an ImageTag --- 145,149 ---- // Register the image scanner parser.addScanner(new ImageScanner("-i",new LinkProcessor())); ! parseAndAssertNodeCount(1); // The node should be an ImageTag *************** *** 155,159 **** assertEquals("Width","305",imageTag.getAttribute("width")); } ! /** * See bug #753003 <IMG> within <A> missed when followed by <MAP> --- 155,159 ---- assertEquals("Width","305",imageTag.getAttribute("width")); } ! /** * See bug #753003 <IMG> within <A> missed when followed by <MAP> *************** *** 171,175 **** return (ImageTag) subNode; } ! return null; } --- 171,175 ---- return (ImageTag) subNode; } ! return null; } *************** *** 193,197 **** createParser (html); parser.registerScanners (); ! parseAndAssertNodeCount (1); assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag); --- 193,197 ---- createParser (html); parser.registerScanners (); ! parseAndAssertNodeCount (1); assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag); *************** *** 218,222 **** assertTrue ("bad source", "http://i.cnn.net/cnn/images/1.gif".equals (img.getImageURL ())); } ! // see bug #778781 SRC-attribute suppression in IMG-tags // HTML before parse: --- 218,222 ---- assertTrue ("bad source", "http://i.cnn.net/cnn/images/1.gif".equals (img.getImageURL ())); } ! // see bug #778781 SRC-attribute suppression in IMG-tags // HTML before parse: Index: InputTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/InputTagTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** InputTagTest.java 8 Sep 2003 02:26:31 -0000 1.29 --- InputTagTest.java 10 Sep 2003 03:38:24 -0000 1.30 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,48 **** private String testHTML = new String("<INPUT type=\"text\" name=\"Google\">"); ! public InputTagTest(String name) { super(name); } ! protected void setUp() throws Exception { --- 40,48 ---- private String testHTML = new String("<INPUT type=\"text\" name=\"Google\">"); ! public InputTagTest(String name) { super(name); } ! protected void setUp() throws Exception { *************** *** 51,56 **** parser.addScanner(new InputTagScanner("-i")); } ! ! public void testToHTML() throws ParserException { parseAndAssertNodeCount(1); --- 51,56 ---- parser.addScanner(new InputTagScanner("-i")); } ! ! public void testToHTML() throws ParserException { parseAndAssertNodeCount(1); *************** *** 60,65 **** assertStringEquals ("HTML String","<INPUT NAME=\"Google\" TYPE=\"text\">",InputTag.toHtml()); } ! ! public void testToString() throws ParserException { parseAndAssertNodeCount(1); --- 60,65 ---- assertStringEquals ("HTML String","<INPUT NAME=\"Google\" TYPE=\"text\">",InputTag.toHtml()); } ! ! public void testToString() throws ParserException { parseAndAssertNodeCount(1); *************** *** 69,73 **** assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : Google\nTYPE : text\n",InputTag.toString()); } ! /** * Reproduction of bug report 663038 --- 69,73 ---- assertEquals("HTML Raw String","INPUT TAG\n--------\nNAME : Google\nTYPE : text\n",InputTag.toString()); } ! /** * Reproduction of bug report 663038 *************** *** 82,93 **** parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag", node[0] instanceof InputTag); InputTag InputTag; InputTag = (InputTag) node[0]; ! assertStringEquals("HTML String", "<INPUT CHECKED NAME=\"cbCheck\" TYPE=\"checkbox\">", InputTag.toHtml()); } ! } --- 82,93 ---- parseAndAssertNodeCount(1); ! assertTrue("Node 1 should be INPUT Tag", node[0] instanceof InputTag); InputTag InputTag; InputTag = (InputTag) node[0]; ! assertStringEquals("HTML String", "<INPUT CHECKED NAME=\"cbCheck\" TYPE=\"checkbox\">", InputTag.toHtml()); } ! } Index: JspTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/JspTagTest.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** JspTagTest.java 8 Sep 2003 02:26:31 -0000 1.29 --- JspTagTest.java 10 Sep 2003 03:38:24 -0000 1.30 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 72,80 **** " else \n"+ " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ "%>\n"); Parser.setLineSeparator("\r\n"); // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(5); // The first node should be an HTMLJspTag --- 72,80 ---- " else \n"+ " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ "%>\n"); Parser.setLineSeparator("\r\n"); // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(5); // The first node should be an HTMLJspTag *************** *** 82,86 **** JspTag tag = (JspTag)node[0]; assertStringEquals("Contents of the tag","@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" ",tag.getText()); ! // The second node should be a normal tag assertTrue("Node 2 should be an Tag",node[1] instanceof Tag); --- 82,86 ---- JspTag tag = (JspTag)node[0]; assertStringEquals("Contents of the tag","@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" ",tag.getText()); ! // The second node should be a normal tag assertTrue("Node 2 should be an Tag",node[1] instanceof Tag); *************** *** 101,107 **** " "; assertEquals("Contents of the tag",expected,tag2.getText()); ! } ! /** * Check if the JSP Tag is being correctly recognized. --- 101,107 ---- " "; assertEquals("Contents of the tag",expected,tag2.getText()); ! } ! /** * Check if the JSP Tag is being correctly recognized. *************** *** 134,142 **** " else \n"+ " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ "%>\n"); Parser.setLineSeparator("\r\n"); // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(5); // The first node should be an HTMLJspTag --- 134,142 ---- " else \n"+ " if(transfer.validate(request))\n"+ ! " %><jsp:forward page=\"transferConfirm.jsp\"/><%\n"+ "%>\n"); Parser.setLineSeparator("\r\n"); // Register the Jsp Scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(5); // The first node should be an HTMLJspTag *************** *** 144,149 **** JspTag tag = (JspTag)node[0]; assertEquals("Raw String of the first JSP tag","<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>",tag.toHtml()); ! ! // The third node should be an HTMLJspTag assertTrue("Node 2 should be an HTMLJspTag",node[2] instanceof JspTag); --- 144,149 ---- JspTag tag = (JspTag)node[0]; assertEquals("Raw String of the first JSP tag","<%@ taglib uri=\"/WEB-INF/struts.tld\" prefix=\"struts\" %>",tag.toHtml()); ! ! // The third node should be an HTMLJspTag assertTrue("Node 2 should be an HTMLJspTag",node[2] instanceof JspTag); *************** *** 161,181 **** assertTrue("Node 4 should be an HTMLJspTag",node[4] instanceof JspTag); JspTag tag4 = (JspTag)node[4]; ! expected = "<%\r\n"+ "%>"; assertEquals("Raw String of the fourth JSP tag",expected,tag4.toHtml()); ! } public void testSpecialCharacters() throws ParserException { ! StringBuffer sb1 = new StringBuffer(); ! sb1.append("<% for (i=0;i<j;i++);%>"); createParser(sb1.toString()); ! ! // Register the jsp scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(1); //assertTrue("Node should be a jsp tag",node[1] instanceof HTMLJspTag); ! JspTag jspTag = (JspTag)node[0]; assertEquals("jsp toHTML()","<% for (i=0;i<j;i++);%>",jspTag.toHtml()); ! } --- 161,181 ---- assertTrue("Node 4 should be an HTMLJspTag",node[4] instanceof JspTag); JspTag tag4 = (JspTag)node[4]; ! expected = "<%\r\n"+ "%>"; assertEquals("Raw String of the fourth JSP tag",expected,tag4.toHtml()); ! } public void testSpecialCharacters() throws ParserException { ! StringBuffer sb1 = new StringBuffer(); ! sb1.append("<% for (i=0;i<j;i++);%>"); createParser(sb1.toString()); ! ! // Register the jsp scanner ! parser.addScanner(new JspScanner("-j")); parseAndAssertNodeCount(1); //assertTrue("Node should be a jsp tag",node[1] instanceof HTMLJspTag); ! JspTag jspTag = (JspTag)node[0]; assertEquals("jsp toHTML()","<% for (i=0;i<j;i++);%>",jspTag.toHtml()); ! } Index: LinkTagTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/LinkTagTest.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** LinkTagTest.java 8 Sep 2003 02:26:31 -0000 1.32 --- LinkTagTest.java 10 Sep 2003 03:38:24 -0000 1.33 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 56,60 **** // Register the image scanner parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); // The node should be an HTMLLinkTag --- 56,60 ---- // Register the image scanner parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); // The node should be an HTMLLinkTag *************** *** 76,80 **** // Register the image scanner parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); // The node should be an HTMLLinkTag --- 76,80 ---- // Register the image scanner parser.addScanner(new LinkScanner("-l")); ! parseAndAssertNodeCount(1); // The node should be an HTMLLinkTag *************** *** 83,87 **** assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink()); } ! /** * The bug ... [truncated message content] |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests/parserHelperTests Modified Files: AllTests.java CompositeTagScannerHelperTest.java RemarkNodeParserTest.java StringParserTest.java Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/AllTests.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** AllTests.java 8 Sep 2003 02:26:30 -0000 1.26 --- AllTests.java 10 Sep 2003 03:38:24 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 30,34 **** import junit.framework.*; ! public class AllTests extends junit.framework.TestCase { --- 30,34 ---- import junit.framework.*; ! public class AllTests extends junit.framework.TestCase { Index: CompositeTagScannerHelperTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/CompositeTagScannerHelperTest.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** CompositeTagScannerHelperTest.java 8 Sep 2003 02:26:31 -0000 1.19 --- CompositeTagScannerHelperTest.java 10 Sep 2003 03:38:24 -0000 1.20 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 47,55 **** protected void setUp() { ! helper = new CompositeTagScannerHelper(null,null,null,null,null,false); } ! ! public void testIsXmlEndTagForRealXml() { Tag tag = new Tag( new TagData( --- 47,55 ---- protected void setUp() { ! helper = new CompositeTagScannerHelper(null,null,null,null,null,false); } ! ! public void testIsXmlEndTagForRealXml() { Tag tag = new Tag( new TagData( *************** *** 60,70 **** } ! public void testIsXmlEndTagForFalseMatches() { Tag tag = new Tag( new TagData( 0,0,"a href=http://someurl.com/","" ) ! ); ! assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); } } --- 60,70 ---- } ! public void testIsXmlEndTagForFalseMatches() { Tag tag = new Tag( new TagData( 0,0,"a href=http://someurl.com/","" ) ! ); ! assertFalse("should not be an xml end tag",helper.isXmlEndTag(tag)); } } Index: RemarkNodeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** RemarkNodeParserTest.java 8 Sep 2003 02:26:31 -0000 1.34 --- RemarkNodeParserTest.java 10 Sep 2003 03:38:24 -0000 1.35 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,46 **** import org.htmlparser.util.ParserException; ! public class RemarkNodeParserTest extends ParserTestCase { public RemarkNodeParserTest(String name) { super(name); } ! /** * The bug being reproduced is this : <BR> --- 37,46 ---- import org.htmlparser.util.ParserException; ! public class RemarkNodeParserTest extends ParserTestCase { public RemarkNodeParserTest(String name) { super(name); } ! /** * The bug being reproduced is this : <BR> *************** *** 54,60 **** * <TEST> * </TEST> ! * * The above line is incorrectly parsed - the remark is not correctly identified. ! * This bug was reported by Serge Kruppa (2002-Feb-08). */ public void testRemarkNodeBug() throws ParserException --- 54,60 ---- * <TEST> * </TEST> ! * * The above line is incorrectly parsed - the remark is not correctly identified. ! * This bug was reported by Serge Kruppa (2002-Feb-08). */ public void testRemarkNodeBug() throws ParserException *************** *** 75,80 **** assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); remarkNode = (RemarkNode)node[5]; --- 75,80 ---- assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.getText()); ! // The sixth node should be a HTMLRemarkNode assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); remarkNode = (RemarkNode)node[5]; *************** *** 98,107 **** assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! } --- 98,107 ---- assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertEquals("Plain Text of the remarkNode #1"," saved from url=(0022)http://internet.e-mail ",remarkNode.toPlainTextString()); ! // The sixth node should be a HTMLRemarkNode assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); remarkNode = (RemarkNode)node[5]; ! assertEquals("Plain Text of the remarkNode #6","\r\n Whats gonna happen now ?\r\n",remarkNode.getText()); ! } *************** *** 122,132 **** assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); } ! public void testNonRemarkNode() throws ParserException { createParser(" <![endif]>"); --- 122,132 ---- assertTrue("First node should be a HTMLRemarkNode",node[0] instanceof RemarkNode); RemarkNode remarkNode = (RemarkNode)node[0]; ! assertStringEquals("Raw String of the remarkNode #1","<!-- saved from url=(0022)http://internet.e-mail -->",remarkNode.toHtml()); ! // The sixth node should be a HTMLRemarkNode assertTrue("Sixth node should be a HTMLRemarkNode",node[5] instanceof RemarkNode); remarkNode = (RemarkNode)node[5]; ! assertStringEquals("Raw String of the remarkNode #6","<!--\r\n Whats gonna happen now ?\r\n-->",remarkNode.toHtml()); } ! public void testNonRemarkNode() throws ParserException { createParser(" <![endif]>"); *************** *** 139,145 **** assertEquals("Text contents"," ",stringNode.getText()); assertEquals("Tag Contents","![endif]",tag.getText()); ! } ! /** * This is the simulation of bug report 586756, submitted --- 139,145 ---- assertEquals("Text contents"," ",stringNode.getText()); assertEquals("Tag Contents","![endif]",tag.getText()); ! } ! /** * This is the simulation of bug report 586756, submitted *************** *** 147,151 **** * If all the comment contains is a blank line, it breaks * the state ! */ public void testRemarkNodeWithBlankLine() throws ParserException { createParser("<!--\n"+ --- 147,151 ---- * If all the comment contains is a blank line, it breaks * the state ! */ public void testRemarkNodeWithBlankLine() throws ParserException { createParser("<!--\n"+ *************** *** 157,168 **** RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Expected contents","\r\n",remarkNode.getText()); ! } ! /** * This is the simulation of a bug report submitted * by Claude Duguay. * If it is a comment with nothing in it, parser crashes ! */ public void testRemarkNodeWithNothing() throws ParserException { createParser("<!-->"); --- 157,168 ---- RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Expected contents","\r\n",remarkNode.getText()); ! } ! /** * This is the simulation of a bug report submitted * by Claude Duguay. * If it is a comment with nothing in it, parser crashes ! */ public void testRemarkNodeWithNothing() throws ParserException { createParser("<!-->"); *************** *** 171,177 **** RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Expected contents","",remarkNode.getText()); ! ! } ! /** * Reproduction of bug reported by John Zook [594301] --- 171,177 ---- RemarkNode remarkNode = (RemarkNode)node[0]; assertEquals("Expected contents","",remarkNode.getText()); ! ! } ! /** * Reproduction of bug reported by John Zook [594301] *************** *** 179,183 **** * <!-- <A> --> * it doesent get parsed correctly ! */ public void testTagWithinRemarkNode() throws ParserException { createParser("<!-- \n"+ --- 179,183 ---- * <!-- <A> --> * it doesent get parsed correctly ! */ public void testTagWithinRemarkNode() throws ParserException { createParser("<!-- \n"+ *************** *** 189,195 **** RemarkNode remarkNode = (RemarkNode)node[0]; assertStringEquals("Expected contents"," \n<A>\nbcd ",remarkNode.getText()); ! } ! /** * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. --- 189,195 ---- RemarkNode remarkNode = (RemarkNode)node[0]; assertStringEquals("Expected contents"," \n<A>\nbcd ",remarkNode.getText()); ! } ! /** * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. *************** *** 215,219 **** Parser.setLineSeparator("\r\n"); } ! /** * Bug reported by John Zook [594301] --- 215,219 ---- Parser.setLineSeparator("\r\n"); } ! /** * Bug reported by John Zook [594301] *************** *** 273,277 **** assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } ! /** * Test a comment declaration with two comments. --- 273,277 ---- assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",remarkNode.getText()); } ! /** * Test a comment declaration with two comments. *************** *** 366,369 **** parseAndAssertNodeCount (10); } ! } --- 366,369 ---- parseAndAssertNodeCount (10); } ! } Index: StringParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/StringParserTest.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** StringParserTest.java 8 Sep 2003 02:26:31 -0000 1.36 --- StringParserTest.java 10 Sep 2003 03:38:24 -0000 1.37 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** super(name); } ! /** * The bug being reproduced is this : <BR> --- 43,47 ---- super(name); } ! /** * The bug being reproduced is this : <BR> *************** *** 60,64 **** assertEquals("Text of the StringNode","Google",stringNode.getText()); } ! /** * Bug reported by Kaarle Kaila of Nokia<br> --- 60,64 ---- assertEquals("Text of the StringNode","Google",stringNode.getText()); } ! /** * Bug reported by Kaarle Kaila of Nokia<br> *************** *** 71,75 **** public void testStringNodeBug2() throws ParserException { // Register the link scanner ! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+ "Acrobat Reader</A> installed on your computer."); --- 71,75 ---- public void testStringNodeBug2() throws ParserException { // Register the link scanner ! createParser("view these documents, you must have <A href='http://www.adobe.com'>Adobe \n"+ "Acrobat Reader</A> installed on your computer."); *************** *** 77,81 **** parser.addScanner(new LinkScanner("-l")); parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; --- 77,81 ---- parser.addScanner(new LinkScanner("-l")); parseAndAssertNodeCount(3); ! // The first node should be a HTMLStringNode- with the text - view these documents, you must have assertTrue("First node should be a HTMLStringNode",node[0] instanceof StringNode); StringNode stringNode = (StringNode)node[0]; *************** *** 85,94 **** assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText()); ! assertTrue("Third node should be a string node",node[2] instanceof StringNode); StringNode stringNode2 = (StringNode)node[2]; assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); } ! /** * Bug reported by Roger Sollberger<br> --- 85,94 ---- assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); assertEquals("Link text is","Adobe \r\nAcrobat Reader",linkNode.getLinkText()); ! assertTrue("Third node should be a string node",node[2] instanceof StringNode); StringNode stringNode2 = (StringNode)node[2]; assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); } ! /** * Bug reported by Roger Sollberger<br> *************** *** 106,110 **** assertEquals("http://asgard.ch",linkTag.getLink()); } ! public void testToPlainTextString() throws ParserException { createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); --- 106,110 ---- assertEquals("http://asgard.ch",linkTag.getLink()); } ! public void testToPlainTextString() throws ParserException { createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); *************** *** 117,121 **** assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); } ! public void testToHTML() throws ParserException { createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); --- 117,121 ---- assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); } ! public void testToHTML() throws ParserException { createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); *************** *** 157,161 **** RemarkNode remarkNode = (RemarkNode)node[1]; assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); ! } --- 157,161 ---- RemarkNode remarkNode = (RemarkNode)node[1]; assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); ! } *************** *** 171,175 **** assertEquals("First String node contents","a",stringNode.getText()); } ! public void testStringWithEmptyLine() throws ParserException { createParser("a\n\nb"); --- 171,175 ---- assertEquals("First String node contents","a",stringNode.getText()); } ! public void testStringWithEmptyLine() throws ParserException { createParser("a\n\nb"); *************** *** 178,183 **** StringNode stringNode = (StringNode)node[0]; assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); ! } ! /** * An attempt to reproduce bug 677176, which passes. --- 178,183 ---- StringNode stringNode = (StringNode)node[0]; assertStringEquals("First String node contents","a\r\n\r\nb",stringNode.getText()); ! } ! /** * An attempt to reproduce bug 677176, which passes. *************** *** 203,213 **** "<body>" + "</body>" + ! "</html>" ! ); parser.registerScanners(); parseAndAssertNodeCount(10); assertType("fourth node",MetaTag.class,node[4]); MetaTag metaTag = (MetaTag)node[4]; ! assertStringEquals( "content", --- 203,213 ---- "<body>" + "</body>" + ! "</html>" ! ); parser.registerScanners(); parseAndAssertNodeCount(10); assertType("fourth node",MetaTag.class,node[4]); MetaTag metaTag = (MetaTag)node[4]; ! assertStringEquals( "content", *************** *** 216,220 **** ); } ! public void testStringWithLineBreaks() throws Exception { createParser("Testing &\nRefactoring"); --- 216,220 ---- ); } ! public void testStringWithLineBreaks() throws Exception { createParser("Testing &\nRefactoring"); *************** *** 224,227 **** assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); } ! } --- 224,227 ---- assertStringEquals("text","Testing &\r\nRefactoring",stringNode.toPlainTextString()); } ! } |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests/scannersTests Modified Files: AllTests.java AppletScannerTest.java BaseHREFScannerTest.java BodyScannerTest.java BulletListScannerTest.java BulletScannerTest.java CompositeTagScannerTest.java DivScannerTest.java FormScannerTest.java FrameScannerTest.java FrameSetScannerTest.java HeadScannerTest.java HtmlTest.java ImageScannerTest.java InputTagScannerTest.java JspScannerTest.java LabelScannerTest.java LinkScannerTest.java MetaTagScannerTest.java OptionTagScannerTest.java ScriptScannerTest.java SelectTagScannerTest.java SpanScannerTest.java StyleScannerTest.java TableScannerTest.java TagScannerTest.java TextareaTagScannerTest.java TitleScannerTest.java XmlEndTagScanningTest.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/AllTests.java,v retrieving revision 1.47 retrieving revision 1.48 diff -C2 -d -r1.47 -r1.48 *** AllTests.java 8 Sep 2003 02:26:31 -0000 1.47 --- AllTests.java 10 Sep 2003 03:38:24 -0000 1.48 *************** *** 18,22 **** // For any questions or suggestions, you can write to me at : // Email :so...@ki... ! // // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha --- 18,22 ---- // For any questions or suggestions, you can write to me at : // Email :so...@ki... ! // // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha *************** *** 31,35 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 31,35 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 38,47 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 38,47 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 51,55 **** import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { --- 51,55 ---- import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { *************** *** 57,61 **** super(name); } ! public static TestSuite suite() { TestSuite suite = new TestSuite("Scanner Tests"); --- 57,61 ---- super(name); } ! public static TestSuite suite() { TestSuite suite = new TestSuite("Scanner Tests"); *************** *** 65,73 **** suite.addTestSuite(ImageScannerTest.class); suite.addTestSuite(LinkScannerTest.class); ! suite.addTestSuite(StyleScannerTest.class); ! suite.addTestSuite(MetaTagScannerTest.class); ! suite.addTestSuite(TitleScannerTest.class); ! suite.addTestSuite(FormScannerTest.class); ! suite.addTestSuite(FrameScannerTest.class); suite.addTestSuite(FrameSetScannerTest.class); suite.addTestSuite(InputTagScannerTest.class); --- 65,73 ---- suite.addTestSuite(ImageScannerTest.class); suite.addTestSuite(LinkScannerTest.class); ! suite.addTestSuite(StyleScannerTest.class); ! suite.addTestSuite(MetaTagScannerTest.class); ! suite.addTestSuite(TitleScannerTest.class); ! suite.addTestSuite(FormScannerTest.class); ! suite.addTestSuite(FrameScannerTest.class); suite.addTestSuite(FrameSetScannerTest.class); suite.addTestSuite(InputTagScannerTest.class); *************** *** 76,82 **** suite.addTestSuite(TextareaTagScannerTest.class); suite.addTestSuite(BaseHREFScannerTest.class); ! suite.addTestSuite(JspScannerTest.class); ! suite.addTestSuite(TableScannerTest.class); ! suite.addTestSuite(SpanScannerTest.class); suite.addTestSuite(DivScannerTest.class); suite.addTestSuite(LabelScannerTest.class); --- 76,82 ---- suite.addTestSuite(TextareaTagScannerTest.class); suite.addTestSuite(BaseHREFScannerTest.class); ! suite.addTestSuite(JspScannerTest.class); ! suite.addTestSuite(TableScannerTest.class); ! suite.addTestSuite(SpanScannerTest.class); suite.addTestSuite(DivScannerTest.class); suite.addTestSuite(LabelScannerTest.class); *************** *** 87,91 **** suite.addTestSuite(BulletScannerTest.class); suite.addTestSuite(HtmlTest.class); ! suite.addTestSuite(XmlEndTagScanningTest.class); return suite; } --- 87,91 ---- suite.addTestSuite(BulletScannerTest.class); suite.addTestSuite(HtmlTest.class); ! suite.addTestSuite(XmlEndTagScanningTest.class); return suite; } Index: AppletScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/AppletScannerTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** AppletScannerTest.java 8 Sep 2003 02:26:31 -0000 1.25 --- AppletScannerTest.java 10 Sep 2003 03:38:24 -0000 1.26 *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,47 **** super(name); } ! ! public void testEvaluate() { AppletScanner scanner = new AppletScanner("-a"); --- 42,47 ---- super(name); } ! ! public void testEvaluate() { AppletScanner scanner = new AppletScanner("-a"); *************** *** 64,71 **** "</HTML>"; createParser(testHTML); ! // Register the applet scanner parser.addScanner(new AppletScanner("-a")); ! parseAndAssertNodeCount(2); assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); --- 64,71 ---- "</HTML>"; createParser(testHTML); ! // Register the applet scanner parser.addScanner(new AppletScanner("-a")); ! parseAndAssertNodeCount(2); assertTrue("Node should be an applet tag",node[0] instanceof AppletTag); Index: BaseHREFScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BaseHREFScannerTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** BaseHREFScannerTest.java 8 Sep 2003 02:26:31 -0000 1.25 --- BaseHREFScannerTest.java 10 Sep 2003 03:38:24 -0000 1.26 *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 44,48 **** super(arg0); } ! protected void setUp() { scanner=new BaseHrefScanner(); --- 44,48 ---- super(arg0); } ! protected void setUp() { scanner=new BaseHrefScanner(); *************** *** 57,68 **** assertEquals("Url2","http://www.google.com",modifiedUrl2); } ! public void testEvaluate() { String testData1 = "BASE HREF=\"http://www.abc.com/\""; assertTrue("Data 1 Should have evaluated true",scanner.evaluate(testData1,null)); String testData2 = "Base href=\"http://www.abc.com/\""; ! assertTrue("Data 2 Should have evaluated true",scanner.evaluate(testData2,null)); } ! public void testScan() throws ParserException{ createParser("<html><head><TITLE>test page</TITLE><BASE HREF=\"http://www.abc.com/\"><a href=\"home.cfm\">Home</a>...</html>","http://www.google.com/test/index.html"); --- 57,68 ---- assertEquals("Url2","http://www.google.com",modifiedUrl2); } ! public void testEvaluate() { String testData1 = "BASE HREF=\"http://www.abc.com/\""; assertTrue("Data 1 Should have evaluated true",scanner.evaluate(testData1,null)); String testData2 = "Base href=\"http://www.abc.com/\""; ! assertTrue("Data 2 Should have evaluated true",scanner.evaluate(testData2,null)); } ! public void testScan() throws ParserException{ createParser("<html><head><TITLE>test page</TITLE><BASE HREF=\"http://www.abc.com/\"><a href=\"home.cfm\">Home</a>...</html>","http://www.google.com/test/index.html"); *************** *** 75,80 **** assertTrue(node[3] instanceof BaseHrefTag); BaseHrefTag baseRefTag = (BaseHrefTag)node[3]; ! assertEquals("Base HREF Url","http://www.abc.com",baseRefTag.getBaseUrl()); } ! } --- 75,80 ---- assertTrue(node[3] instanceof BaseHrefTag); BaseHrefTag baseRefTag = (BaseHrefTag)node[3]; ! assertEquals("Base HREF Url","http://www.abc.com",baseRefTag.getBaseUrl()); } ! } Index: BodyScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BodyScannerTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** BodyScannerTest.java 8 Sep 2003 02:26:31 -0000 1.11 --- BodyScannerTest.java 10 Sep 2003 03:38:24 -0000 1.12 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 56,60 **** assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! public void testBodywithJsp() throws ParserException { createParser("<html><head><title>Test 1</title></head><body><%=BodyValue%></body></html>"); --- 56,60 ---- assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! public void testBodywithJsp() throws ParserException { createParser("<html><head><title>Test 1</title></head><body><%=BodyValue%></body></html>"); *************** *** 69,73 **** assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! public void testBodyMixed() throws ParserException { createParser("<html><head><title>Test 1</title></head><body>before jsp<%=BodyValue%>after jsp</body></html>"); --- 69,73 ---- assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! public void testBodyMixed() throws ParserException { createParser("<html><head><title>Test 1</title></head><body>before jsp<%=BodyValue%>after jsp</body></html>"); *************** *** 82,86 **** assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! public void testBodyEnding() throws ParserException { createParser("<html><body>before jsp<%=BodyValue%>after jsp</html>"); --- 82,86 ---- assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! public void testBodyEnding() throws ParserException { createParser("<html><body>before jsp<%=BodyValue%>after jsp</html>"); *************** *** 95,109 **** assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! ! public static TestSuite suite() { return new TestSuite(BodyScannerTest.class); } ! ! public static void main(String[] args) { new junit.awtui.TestRunner().start(new String[] {BodyScannerTest.class.getName()}); } ! } --- 95,109 ---- assertEquals("Body Scanner",bodyScanner,bodyTag.getThisScanner()); } ! ! public static TestSuite suite() { return new TestSuite(BodyScannerTest.class); } ! ! public static void main(String[] args) { new junit.awtui.TestRunner().start(new String[] {BodyScannerTest.class.getName()}); } ! } Index: BulletListScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BulletListScannerTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** BulletListScannerTest.java 8 Sep 2003 02:26:31 -0000 1.7 --- BulletListScannerTest.java 10 Sep 2003 03:38:24 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 64,72 **** parser.registerScanners(); parseAndAssertNodeCount(1); ! ! NodeList nestedBulletLists = ((CompositeTag)node[0]).searchFor( BulletList.class ! ); assertEquals( "bullets in first list", --- 64,72 ---- parser.registerScanners(); parseAndAssertNodeCount(1); ! ! NodeList nestedBulletLists = ((CompositeTag)node[0]).searchFor( BulletList.class ! ); assertEquals( "bullets in first list", *************** *** 76,80 **** BulletList firstList = (BulletList)nestedBulletLists.elementAt(0); ! Bullet firstBullet = (Bullet)firstList.childAt(0); Node firstNodeInFirstBullet = --- 76,80 ---- BulletList firstList = (BulletList)nestedBulletLists.elementAt(0); ! Bullet firstBullet = (Bullet)firstList.childAt(0); Node firstNodeInFirstBullet = Index: BulletScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/BulletScannerTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** BulletScannerTest.java 8 Sep 2003 02:26:31 -0000 1.8 --- BulletScannerTest.java 10 Sep 2003 03:38:24 -0000 1.9 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 51,56 **** assertType("should be a bullet",Bullet.class,node[0]); } ! ! public void testOutOfMemoryBug() throws ParserException { createParser( --- 51,56 ---- assertType("should be a bullet",Bullet.class,node[0]); } ! ! public void testOutOfMemoryBug() throws ParserException { createParser( *************** *** 71,75 **** "" + "</body>" + ! "</html>" ); parser.registerScanners(); --- 71,75 ---- "" + "</body>" + ! "</html>" ); parser.registerScanners(); *************** *** 78,83 **** System.out.println(node.toHtml()); } ! } ! public void testNonEndedBullets() throws ParserException { createParser( --- 78,83 ---- System.out.println(node.toHtml()); } ! } ! public void testNonEndedBullets() throws ParserException { createParser( Index: CompositeTagScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/CompositeTagScannerTest.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** CompositeTagScannerTest.java 8 Sep 2003 02:26:31 -0000 1.37 --- CompositeTagScannerTest.java 10 Sep 2003 03:38:24 -0000 1.38 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** private CompositeTagScanner scanner; private String url; ! public CompositeTagScannerTest(String name) { super(name); --- 43,47 ---- private CompositeTagScanner scanner; private String url; ! public CompositeTagScannerTest(String name) { super(name); *************** *** 49,56 **** protected void setUp() { ! String [] arr = { "SOMETHING" }; ! scanner = new CompositeTagScanner(arr) { public Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException { --- 49,56 ---- protected void setUp() { ! String [] arr = { "SOMETHING" }; ! scanner = new CompositeTagScanner(arr) { public Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException { *************** *** 85,89 **** assertStringEquals("html","<CUSTOM/>",customTag.toHtml()); } ! public void testEmptyCompositeTagAnotherStyle() throws ParserException { createParser( --- 85,89 ---- assertStringEquals("html","<CUSTOM/>",customTag.toHtml()); } ! public void testEmptyCompositeTagAnotherStyle() throws ParserException { createParser( *************** *** 113,117 **** assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! Node child = customTag.childAt(0); assertType("child",StringNode.class,child); --- 113,117 ---- assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! Node child = customTag.childAt(0); assertType("child",StringNode.class,child); *************** *** 158,166 **** assertEquals("another tag start pos",8,tag.elementBegin()); assertEquals("another tag ending pos",17,tag.elementEnd()); ! assertEquals("custom end tag start pos",18,customTag.getEndTag().elementBegin()); assertStringEquals("child html","<ANOTHER/>",child.toHtml()); } ! public void testParseTwoCompositeTags() throws ParserException { createParser( --- 158,166 ---- assertEquals("another tag start pos",8,tag.elementBegin()); assertEquals("another tag ending pos",17,tag.elementEnd()); ! assertEquals("custom end tag start pos",18,customTag.getEndTag().elementBegin()); assertStringEquals("child html","<ANOTHER/>",child.toHtml()); } ! public void testParseTwoCompositeTags() throws ParserException { createParser( *************** *** 174,178 **** assertType("tag 2",CustomTag.class,node[1]); } ! public void testXmlTypeCompositeTags() throws ParserException { createParser( --- 174,178 ---- assertType("tag 2",CustomTag.class,node[1]); } ! public void testXmlTypeCompositeTags() throws ParserException { createParser( *************** *** 193,198 **** node = customTag.childAt(1); assertType("second child",CustomTag.class,node); ! } ! public void testCompositeTagWithNestedTag() throws ParserException { createParser( --- 193,198 ---- node = customTag.childAt(1); assertType("second child",CustomTag.class,node); ! } ! public void testCompositeTagWithNestedTag() throws ParserException { createParser( *************** *** 250,254 **** assertEquals("text","Hello",text.toPlainTextString()); } ! public void testErroneousCompositeTag() throws ParserException { createParser("<custom>"); --- 250,254 ---- assertEquals("text","Hello",text.toPlainTextString()); } ! public void testErroneousCompositeTag() throws ParserException { createParser("<custom>"); *************** *** 260,264 **** assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM></CUSTOM>",customTag.toHtml()); } --- 260,264 ---- assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM></CUSTOM>",customTag.toHtml()); } *************** *** 276,282 **** assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM><FIRSTCHILD><SECONDCHILD></CUSTOM>",customTag.toHtml()); } ! public void testErroneousCompositeTagWithChildrenAndLineBreak() throws ParserException { createParser( --- 276,282 ---- assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM><FIRSTCHILD><SECONDCHILD></CUSTOM>",customTag.toHtml()); } ! public void testErroneousCompositeTagWithChildrenAndLineBreak() throws ParserException { createParser( *************** *** 298,304 **** "</CUSTOM>", customTag.toHtml() ! ); } ! public void testTwoConsecutiveErroneousCompositeTags() throws ParserException { createParser( --- 298,304 ---- "</CUSTOM>", customTag.toHtml() ! ); } ! public void testTwoConsecutiveErroneousCompositeTags() throws ParserException { createParser( *************** *** 320,324 **** "<CUSTOM>something</CUSTOM>", customTag.toHtml() ! ); customTag = (CustomTag)node[1]; assertStringEquals( --- 320,324 ---- "<CUSTOM>something</CUSTOM>", customTag.toHtml() ! ); customTag = (CustomTag)node[1]; assertStringEquals( *************** *** 328,332 **** ); } ! public void testCompositeTagWithErroneousAnotherTagAndLineBreak() throws ParserException { createParser( --- 328,332 ---- ); } ! public void testCompositeTagWithErroneousAnotherTagAndLineBreak() throws ParserException { createParser( *************** *** 340,344 **** AnotherTag anotherTag = (AnotherTag)node[0]; assertEquals("another tag child count",0,anotherTag.getChildCount()); ! CustomTag customTag = (CustomTag)node[1]; assertEquals("child count",0,customTag.getChildCount()); --- 340,344 ---- AnotherTag anotherTag = (AnotherTag)node[0]; assertEquals("another tag child count",0,anotherTag.getChildCount()); ! CustomTag customTag = (CustomTag)node[1]; assertEquals("child count",0,customTag.getChildCount()); *************** *** 349,353 **** assertEquals("ending line position",2,customTag.tagData.getEndLine()); assertStringEquals("another tag html","<ANOTHER></ANOTHER>",anotherTag.toHtml()); ! assertStringEquals("custom tag html","<CUSTOM>\r\n</CUSTOM>",customTag.toHtml()); } --- 349,353 ---- assertEquals("ending line position",2,customTag.tagData.getEndLine()); assertStringEquals("another tag html","<ANOTHER></ANOTHER>",anotherTag.toHtml()); ! assertStringEquals("custom tag html","<CUSTOM>\r\n</CUSTOM>",customTag.toHtml()); } *************** *** 368,372 **** assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM><ANOTHER></ANOTHER></CUSTOM>",customTag.toHtml()); } --- 368,372 ---- assertEquals("starting line position",1,customTag.tagData.getStartLine()); assertEquals("ending line position",1,customTag.tagData.getEndLine()); ! assertStringEquals("html","<CUSTOM><ANOTHER></ANOTHER></CUSTOM>",customTag.toHtml()); } *************** *** 396,400 **** "<CUSTOM><ANOTHER>something</ANOTHER></CUSTOM>", customTag.toHtml() ! ); customTag = (CustomTag)node[1]; assertStringEquals( --- 396,400 ---- "<CUSTOM><ANOTHER>something</ANOTHER></CUSTOM>", customTag.toHtml() ! ); customTag = (CustomTag)node[1]; assertStringEquals( *************** *** 422,427 **** assertEquals("custom end tag begin loc",10,customTag.getEndTag().elementBegin()); assertEquals("custom end tag end loc",8,customTag.getEndTag().elementEnd()); ! } ! public void testCompositeTagWithSelfChildren() throws ParserException { createParser( --- 422,427 ---- assertEquals("custom end tag begin loc",10,customTag.getEndTag().elementBegin()); assertEquals("custom end tag end loc",8,customTag.getEndTag().elementEnd()); ! } ! public void testCompositeTagWithSelfChildren() throws ParserException { createParser( *************** *** 433,441 **** parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(3); ! CustomTag customTag = (CustomTag)node[0]; assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertStringEquals( "first custom tag html", --- 433,441 ---- parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(3); ! CustomTag customTag = (CustomTag)node[0]; assertEquals("child count",0,customTag.getChildCount()); assertFalse("custom tag should not be xml end tag",customTag.isEmptyXmlTag()); ! assertStringEquals( "first custom tag html", *************** *** 456,460 **** ); } ! public void testParentConnections() throws ParserException { createParser( --- 456,460 ---- ); } ! public void testParentConnections() throws ParserException { createParser( *************** *** 466,472 **** parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(3); ! CustomTag customTag = (CustomTag)node[0]; ! assertStringEquals( "first custom tag html", --- 466,472 ---- parser.addScanner(new AnotherScanner()); parseAndAssertNodeCount(3); ! CustomTag customTag = (CustomTag)node[0]; ! assertStringEquals( "first custom tag html", *************** *** 478,482 **** customTag.getParent() ); ! customTag = (CustomTag)node[1]; assertStringEquals( --- 478,482 ---- customTag.getParent() ); ! customTag = (CustomTag)node[1]; assertStringEquals( *************** *** 489,493 **** customTag.getParent() ); ! Node firstChild = customTag.childAt(0); assertType("firstChild",StringNode.class,firstChild); --- 489,493 ---- customTag.getParent() ); ! Node firstChild = customTag.childAt(0); assertType("firstChild",StringNode.class,firstChild); *************** *** 495,499 **** assertNotNull("first child parent should not be null",parent); assertSame("parent and custom tag should be the same",customTag,parent); ! EndTag endTag = (EndTag)node[2]; assertStringEquals( --- 495,499 ---- assertNotNull("first child parent should not be null",parent); assertSame("parent and custom tag should be the same",customTag,parent); ! EndTag endTag = (EndTag)node[2]; assertStringEquals( *************** *** 508,515 **** } ! public void testUrlBeingProvidedToCreateTag() throws ParserException { createParser("<Custom/>","http://www.yahoo.com"); ! parser.addScanner(new CustomScanner() { public Tag createTag( --- 508,515 ---- } ! public void testUrlBeingProvidedToCreateTag() throws ParserException { createParser("<Custom/>","http://www.yahoo.com"); ! parser.addScanner(new CustomScanner() { public Tag createTag( *************** *** 524,528 **** }); parseAndAssertNodeCount(1); ! assertStringEquals("url","http://www.yahoo.com",url); } --- 524,528 ---- }); parseAndAssertNodeCount(1); ! assertStringEquals("url","http://www.yahoo.com",url); } *************** *** 560,564 **** "<custom>\n" + "Hey\n" + ! "</custom>" ); parser.addScanner(new CustomScanner(false)); --- 560,564 ---- "<custom>\n" + "Hey\n" + ! "</custom>" ); parser.addScanner(new CustomScanner(false)); *************** *** 568,605 **** } } ! public static class CustomScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = { "CUSTOM" }; ! public CustomScanner() { ! this(true); } ! ! public CustomScanner(boolean selfChildrenAllowed) { ! super("", MATCH_NAME, new String[] {}, selfChildrenAllowed); } ! ! public String[] getID() { ! return MATCH_NAME; } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) { return new CustomTag(tagData, compositeTagData); } } ! public static class AnotherScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = { "ANOTHER" }; ! public AnotherScanner() { ! super("", MATCH_NAME, new String[] {"CUSTOM"}); } ! ! public AnotherScanner(boolean acceptCustomTagsButDontAcceptCustomEndTags) { ! super("", MATCH_NAME, new String[] {}, new String[] {"CUSTOM"}, true); } ! ! public String[] getID() { ! return MATCH_NAME; } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) { return new AnotherTag(tagData, compositeTagData); --- 568,605 ---- } } ! public static class CustomScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = { "CUSTOM" }; ! public CustomScanner() { ! this(true); } ! ! public CustomScanner(boolean selfChildrenAllowed) { ! super("", MATCH_NAME, new String[] {}, selfChildrenAllowed); } ! ! public String[] getID() { ! return MATCH_NAME; } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) { return new CustomTag(tagData, compositeTagData); } } ! public static class AnotherScanner extends CompositeTagScanner { private static final String MATCH_NAME [] = { "ANOTHER" }; ! public AnotherScanner() { ! super("", MATCH_NAME, new String[] {"CUSTOM"}); } ! ! public AnotherScanner(boolean acceptCustomTagsButDontAcceptCustomEndTags) { ! super("", MATCH_NAME, new String[] {}, new String[] {"CUSTOM"}, true); } ! ! public String[] getID() { ! return MATCH_NAME; } ! public Tag createTag(TagData tagData, CompositeTagData compositeTagData) { return new AnotherTag(tagData, compositeTagData); *************** *** 608,612 **** return false; } ! } --- 608,612 ---- return false; } ! } *************** *** 618,622 **** } } ! public static class AnotherTag extends CompositeTag { public AnotherTag(TagData tagData, CompositeTagData compositeTagData) { --- 618,622 ---- } } ! public static class AnotherTag extends CompositeTag { public AnotherTag(TagData tagData, CompositeTagData compositeTagData) { Index: DivScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/DivScannerTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** DivScannerTest.java 8 Sep 2003 02:26:31 -0000 1.31 --- DivScannerTest.java 10 Sep 2003 03:38:24 -0000 1.32 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: FormScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/FormScannerTest.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** FormScannerTest.java 8 Sep 2003 02:26:31 -0000 1.31 --- FormScannerTest.java 10 Sep 2003 03:38:24 -0000 1.32 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 60,66 **** "<INPUT TYPE=\"submit\">\n"+ "</FORM>"; ! public static final String EXPECTED_FORM_HTML_FORMLINE="<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"; ! public static final String EXPECTED_FORM_HTML_REST_OF_FORM= "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n"+ --- 60,66 ---- "<INPUT TYPE=\"submit\">\n"+ "</FORM>"; ! public static final String EXPECTED_FORM_HTML_FORMLINE="<FORM ACTION=\"http://www.google.com/test/do_login.php\" NAME=\"login_form\" ONSUBMIT=\"return CheckData()\" METHOD=\""+FormTag.POST+"\">\r\n"; ! public static final String EXPECTED_FORM_HTML_REST_OF_FORM= "<TR><TD ALIGN=\"center\"> </TD></TR>\r\n"+ "<TR><TD ALIGN=\"center\"><FONT FACE=\"Arial, verdana\" SIZE=\"2\"><B>User Name</B></FONT></TD></TR>\r\n"+ *************** *** 77,86 **** "</FORM>"; public static final String EXPECTED_FORM_HTML = EXPECTED_FORM_HTML_FORMLINE+EXPECTED_FORM_HTML_REST_OF_FORM; ! public FormScannerTest(String name) { super(name); } ! ! public void testEvaluate() { String line1="form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; String line2="FORM method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; --- 77,86 ---- "</FORM>"; public static final String EXPECTED_FORM_HTML = EXPECTED_FORM_HTML_FORMLINE+EXPECTED_FORM_HTML_REST_OF_FORM; ! public FormScannerTest(String name) { super(name); } ! ! public void testEvaluate() { String line1="form method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; String line2="FORM method=\"post\" onsubmit=\"return implementsearch()\" name=frmsearch id=form"; *************** *** 91,95 **** assertTrue("Line 3",formScanner.evaluate(line3,null)); } ! public void assertTypeNameSize(String description,String type,String name,String size,InputTag inputTag) { assertEquals(description+" type",type,inputTag.getAttribute("TYPE")); --- 91,95 ---- assertTrue("Line 3",formScanner.evaluate(line3,null)); } ! public void assertTypeNameSize(String description,String type,String name,String size,InputTag inputTag) { assertEquals(description+" type",type,inputTag.getAttribute("TYPE")); *************** *** 101,105 **** assertEquals(description+" name",name,inputTag.getAttribute("NAME")); assertEquals(description+" value",value,inputTag.getAttribute("VALUE")); ! } public void testScan() throws ParserException { createParser(FORM_HTML,"http://www.google.com/test/index.html"); --- 101,105 ---- assertEquals(description+" name",name,inputTag.getAttribute("NAME")); assertEquals(description+" value",value,inputTag.getAttribute("VALUE")); ! } public void testScan() throws ParserException { createParser(FORM_HTML,"http://www.google.com/test/index.html"); *************** *** 110,114 **** assertStringEquals("Method",FormTag.POST,formTag.getFormMethod()); assertStringEquals("Location","http://www.google.com/test/do_login.php",formTag.getFormLocation()); ! assertStringEquals("Name","login_form",formTag.getFormName()); InputTag nameTag = formTag.getInputTag("name"); InputTag passwdTag = formTag.getInputTag("passwd"); --- 110,114 ---- assertStringEquals("Method",FormTag.POST,formTag.getFormMethod()); assertStringEquals("Location","http://www.google.com/test/do_login.php",formTag.getFormLocation()); ! assertStringEquals("Name","login_form",formTag.getFormName()); InputTag nameTag = formTag.getInputTag("name"); InputTag passwdTag = formTag.getInputTag("passwd"); *************** *** 119,135 **** assertNotNull("Input Submit Tag should not be null",submitTag); assertNull("Input dummy tag should be null",dummyTag); ! assertTypeNameSize("Input Name Tag","text","name","20",nameTag); assertTypeNameSize("Input Password Tag","password","passwd","20",passwdTag); assertTypeNameValue("Input Submit Tag","submit","submit","Login",submitTag); ! TextareaTag textAreaTag = formTag.getTextAreaTag("Description"); assertNotNull("Text Area Tag should have been found",textAreaTag); assertEquals("Text Area Tag Contents","Contents of TextArea",textAreaTag.getValue()); assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! assertStringEquals("toHTML",EXPECTED_FORM_HTML,formTag.toHtml()); } ! public void testScanFormWithNoEnding() throws Exception { createParser( --- 119,135 ---- assertNotNull("Input Submit Tag should not be null",submitTag); assertNull("Input dummy tag should be null",dummyTag); ! assertTypeNameSize("Input Name Tag","text","name","20",nameTag); assertTypeNameSize("Input Password Tag","password","passwd","20",passwdTag); assertTypeNameValue("Input Submit Tag","submit","submit","Login",submitTag); ! TextareaTag textAreaTag = formTag.getTextAreaTag("Description"); assertNotNull("Text Area Tag should have been found",textAreaTag); assertEquals("Text Area Tag Contents","Contents of TextArea",textAreaTag.getValue()); assertNull("Should have been null",formTag.getTextAreaTag("junk")); ! assertStringEquals("toHTML",EXPECTED_FORM_HTML,formTag.toHtml()); } ! public void testScanFormWithNoEnding() throws Exception { createParser( *************** *** 146,155 **** "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ "</TABLE>","http://www.google.com/test/index.html"); ! parser.addScanner(new FormScanner("",parser)); ! parseAndAssertNodeCount(2); } ! /** * Bug reported by Pavan Podila - forms with links are not being parsed * Sample html is from google --- 146,155 ---- "<INPUT TYPE=\"hidden\" NAME=\"password\" SIZE=\"20\">\n"+ "</TABLE>","http://www.google.com/test/index.html"); ! parser.addScanner(new FormScanner("",parser)); ! parseAndAssertNodeCount(2); } ! /** * Bug reported by Pavan Podila - forms with links are not being parsed * Sample html is from google *************** *** 165,170 **** " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! parser.addScanner(new FormScanner("",parser)); parser.addScanner(new LinkScanner()); --- 165,170 ---- " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! parser.addScanner(new FormScanner("",parser)); parser.addScanner(new LinkScanner()); *************** *** 177,181 **** Node formNode = e.nextNode(); if (formNode instanceof LinkTag) { ! linkTag[i++] = (LinkTag)formNode; } } --- 177,181 ---- Node formNode = e.nextNode(); if (formNode instanceof LinkTag) { ! linkTag[i++] = (LinkTag)formNode; } } *************** *** 185,189 **** assertEquals("Third Link Tag Text","Language Tools",linkTag[2].getLinkText()); } ! /** * Bug 652674 - forms with comments are not being parsed */ --- 185,189 ---- assertEquals("Third Link Tag Text","Language Tools",linkTag[2].getLinkText()); } ! /** * Bug 652674 - forms with comments are not being parsed */ *************** *** 199,204 **** " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! parser.addScanner(new FormScanner("",parser)); parseAndAssertNodeCount(1); --- 199,204 ---- " <a href=/preferences?hl=en>Preferences</a><br> • <a href=/"+ "language_tools?hl=en>Language Tools</a></font></td></tr></table></form>" ! ); ! parser.addScanner(new FormScanner("",parser)); parseAndAssertNodeCount(1); *************** *** 210,225 **** Node formNode = (Node)e.nextNode(); if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; } } assertEquals("Remark Node Count",1,i); assertEquals("First Remark Node"," Hello World ",remarkNode[0].toPlainTextString()); ! } ! /** * Bug 652674 - forms with comments are not being parsed */ public void testScanFormWithComments2() throws ParserException { createParser( ! "<FORM id=\"id\" name=\"name\" action=\"http://some.site/aPage.asp?id=97\" method=\"post\">\n"+ " <!--\n"+ " Just a Comment\n"+ --- 210,225 ---- Node formNode = (Node)e.nextNode(); if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; } } assertEquals("Remark Node Count",1,i); assertEquals("First Remark Node"," Hello World ",remarkNode[0].toPlainTextString()); ! } ! /** * Bug 652674 - forms with comments are not being parsed */ public void testScanFormWithComments2() throws ParserException { createParser( ! "<FORM id=\"id\" name=\"name\" action=\"http://some.site/aPage.asp?id=97\" method=\"post\">\n"+ " <!--\n"+ " Just a Comment\n"+ *************** *** 235,244 **** Node formNode = (Node)e.nextNode(); if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; } } assertEquals("Remark Node Count",1,i); ! } ! /** * Bug 656870 - a form tag with a previously open link causes infinite loop --- 235,244 ---- Node formNode = (Node)e.nextNode(); if (formNode instanceof RemarkNode) { ! remarkNode[i++] = (RemarkNode)formNode; } } assertEquals("Remark Node Count",1,i); ! } ! /** * Bug 656870 - a form tag with a previously open link causes infinite loop *************** *** 272,276 **** /** ! * Bug 713907 reported by Dhaval Udani, erroneous * parsing of form tag (even when form scanner is not * registered) --- 272,276 ---- /** ! * Bug 713907 reported by Dhaval Udani, erroneous * parsing of form tag (even when form scanner is not * registered) *************** *** 296,300 **** ); } ! /** * See bug #745566 StackOverflowError on select with too many unclosed options. --- 296,300 ---- ); } ! /** * See bug #745566 StackOverflowError on select with too many unclosed options. *************** *** 306,310 **** int i; Node[] nodes; ! parser = new Parser(url); parser.registerScanners (); --- 306,310 ---- int i; Node[] nodes; ! parser = new Parser(url); parser.registerScanners (); Index: FrameScannerTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/scannersTests/FrameScannerTest.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FrameScannerTest.java 8 Sep 2003 02:26:31 -0000 1.25 --- FrameScannerTest.java 10 Sep 2003 03:38:24 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 39,65 **** super(name); } ! public void testScan() throws ParserException { createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ "</frameset>","http://www.google.com/test/index.html"); ! parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(4); ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); assertTrue("Node 2 should be Frame Tag",node[2] instanceof FrameTag); FrameTag frameTag1 = (FrameTag)node[1]; ! FrameTag frameTag2 = (FrameTag)node[2]; assertEquals("Frame 1 Locn","http://www.google.com/test/demo_bc_top.html",frameTag1.getFrameLocation()); assertEquals("Frame 1 Name","topFrame",frameTag1.getFrameName()); ! assertEquals("Frame 2 Locn","http://www.kizna.com/web_e/",frameTag2.getFrameLocation()); assertEquals("Frame 2 Name","mainFrame",frameTag2.getFrameName()); assertEquals("Frame 1 Scrolling","NO",frameTag1.getAttribute("scrolling")); ! assertEquals("Frame 1 Border","NO",frameTag1.getAttribute("frameborder")); } } --- 39,65 ---- super(name); } ! public void testScan() throws ParserException { createParser( ! "<frameset rows=\"115,*\" frameborder=\"NO\" border=\"0\" framespacing=\"0\">\n"+ "<frame name=\"topFrame\" noresize src=\"demo_bc_top.html\" scrolling=\"NO\" frameborder=\"NO\">\n"+ "<frame name=\"mainFrame\" src=\"http://www.kizna.com/web_e/\" scrolling=\"AUTO\">\n"+ "</frameset>","http://www.google.com/test/index.html"); ! parser.addScanner(new FrameScanner("")); ! parseAndAssertNodeCount(4); ! assertTrue("Node 1 should be Frame Tag",node[1] instanceof FrameTag); assertTrue("Node 2 should be Frame Tag",node[2] i... [truncated message content] |
From: <der...@us...> - 2003-09-10 03:54:16
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/tabby In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/lexerapplications/tabby Added Files: Tabby.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. --- NEW FILE: Tabby.java --- // HTMLParser Library $Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2003 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexerapplications/tabby/Tabby.java,v $ // $Author: derrickoswald $ // $Date: 2003/09/10 03:38:26 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.lexerapplications.tabby; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.htmlparser.lexer.Cursor; import org.htmlparser.lexer.Page; /** * Replace tabs with spaces. * Convert tabs to the correct number of spaces according to a tabstop, * change DOS \r\n line endings to Unix \n form, and remove trailing whitespace */ public class Tabby { /** * The default tab stop spacing. */ private static final int DEFAULT_TABSTOP = 4; /** * The file filter to apply. */ protected Filter mFilter; /** * The replacement tab stop size. */ protected int mTabsize; /** * Creates a new instance of Tabby with no file filter and a tab stop of 4. */ public Tabby () { mFilter = null; mTabsize = DEFAULT_TABSTOP; } /** * Creates a new instance of Tabby using the given regular expression and * a tab stop of 4. * @param filter The regular expression to apply to the files searched. */ public Tabby (final String filter) { this (); mFilter = new Filter (filter); } /** Creates a new instance of Tabby. * @param filter The regular expression to apply to the files searched. * @param tabsize The tab stop setting. * @exception IllegalArgumentException If tabsize is not a positive number. */ public Tabby (final String filter, final int tabsize) throws IllegalArgumentException { this (filter); if (0 >= tabsize) throw new IllegalArgumentException ("tab size cannot be negative"); mTabsize = tabsize; } /** * Process the file or directory. * @param file The file to process. */ protected void process (final File file) { File[] files; File f; if (file.isDirectory ()) { files = file.listFiles (mFilter); for (int i = 0; i < files.length; i++) process (files[i]); } else edit (file); } /** * Process the file or directory. * @param file The file to edit. */ protected void edit (final File file) { FileInputStream in; Page page; Cursor cursor; int position; int expected; boolean modified; char ch; int last; StringBuffer buffer; FileOutputStream out; try { in = new FileInputStream (file); buffer = new StringBuffer (in.available ()); try { page = new Page (in, null); cursor = new Cursor (page, 0); position = 0; modified = false; expected = 0; last = -1; while (0 != (ch = page.getCharacter (cursor))) { if (++expected != cursor.getPosition ()) { modified = true; expected = cursor.getPosition (); } if ('\t' == ch) { do { buffer.append (' '); position++; } while (0 != (position % mTabsize)); modified = true; } else if ('\n' == ch) { // check for whitespace on the end of the line if (last + 1 != position) { // remove trailing whitespace last = buffer.length () - (position - last - 1); buffer.setLength (last); modified = true; } buffer.append (ch); position = 0; last = -1; } else { buffer.append (ch); if (!Character.isWhitespace (ch)) last = position; position++; } } } finally { in.close (); } if (modified) { System.out.println (file.getAbsolutePath ()); out = new FileOutputStream (file); out.write (buffer.toString ().getBytes (Page.DEFAULT_CHARSET)); out.close (); } } catch (Exception e) { System.out.println (e); } } /** * Implement a file filter. */ class Filter implements FileFilter { /** * The compiled expression. */ protected Pattern mExpression; /** * Create a file filter from the regular expression. * @param expression The <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html#sum">regular expression</a>. * A useful regular expression is ".*\.java" which accepts all * .java files. * @exception IllegalArgumentException If the expression is * <code>null</code>. * @exception PatternSyntaxException If the expression is not a valid * regular expression. */ public Filter (final String expression) throws PatternSyntaxException { if (null == expression) throw new IllegalArgumentException ( "filter expression cannot be null"); mExpression = Pattern.compile (expression); } // // FileFilter interface // /** * Tests whether or not the file should be included in a pathname list. * @param pathname The abstract pathname to be tested. * @return <code>true</code> if and only if <code>pathname</code> * should be included. */ public boolean accept (final File pathname) { Matcher matcher; boolean ret; // match directories if (pathname.isDirectory ()) ret = true; else { matcher = mExpression.matcher (pathname.getAbsolutePath ()); ret = matcher.matches (); } return (ret); } } /** * Run Tabby on a file or directory. * @param args The command line arguments. * <PRE> * args[0] The file or directory to work on. * args[1] Optional, the regular expression to use as a file filter * args[2] Optional, the tab stop setting (integer). * </PRE> */ public static void main (final String[] args) { Tabby tabby; File file; if (0 == args.length) System.out.println ( "usage: Tabby (<directory>|<file>)" + " [file-match regexp] [tabsize]"); else { if (2 < args.length) tabby = new Tabby (args[1], Integer.parseInt (args[2])); else if (1 < args.length) tabby = new Tabby (args[1]); else tabby = new Tabby (); file = new File (args[0]); tabby.process (file); } } } /* * Revision Control Modification History * * $Log: Tabby.java,v $ * Revision 1.1 2003/09/10 03:38:26 derrickoswald * Add style checking target to ant build script: * ant checkstyle * It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. * The rules are in the file htmlparser_checks.xml in the src directory. * * Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation * on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. * * There are a few issues with the style checker that need to be resolved before it should be taken too seriously. * For example: * It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). * It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment * that's more than 80 characters long. * It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. * It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } * * But it points out some really interesting things, even if you don't agree with the style guidelines, * so it's worth a look. * * */ --- NEW FILE: package.html --- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <HTML> <HEAD> <TITLE>Tabby</TITLE> </HEAD> <BODY> The Tabby program is a demonstration of how to use the underlying Lexer classes to perform file I/O. The results could also be achieved with a normal Reader object but then there wouldn't be anything interesting about this. <p> The task is to replace tabs with spaces and preserve any indentaion that was a combination of tabs and spaces. This means honouring the 'tab stops' that would be used by an editor and not just a global search and replace. <p> The mechanism used is to keep track of the character offset from the beginning of a line and when a tab is encountered add spaces till the next tab stop. <p> At the same time any "\r\n" combinations that are in the file are converted to simple "\n" characters as found on Unix/Linux systems, and trailing whitespace at the ends of lines is removed. <p> The trick is to only write the file if something in the file required changes, so a boolean variable is kept current with a <code>true</code> value indicating the file needs to be modified. </BODY> </HTML> |
From: <der...@us...> - 2003-09-10 03:54:15
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/visitors Modified Files: HtmlPage.java LinkFindingVisitor.java NodeVisitor.java ObjectFindingVisitor.java StringFindingVisitor.java TagFindingVisitor.java TextExtractingVisitor.java UrlModifyingVisitor.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: HtmlPage.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/HtmlPage.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** HtmlPage.java 8 Sep 2003 02:26:33 -0000 1.32 --- HtmlPage.java 10 Sep 2003 03:38:25 -0000 1.33 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 45,49 **** private NodeList tables; private boolean bodyTagBegin; ! public HtmlPage(Parser parser) { super(false); --- 45,49 ---- private NodeList tables; private boolean bodyTagBegin; ! public HtmlPage(Parser parser) { super(false); *************** *** 54,58 **** bodyTagBegin = false; } ! public String getTitle() { return title; --- 54,58 ---- bodyTagBegin = false; } ! public String getTitle() { return title; *************** *** 65,69 **** public void visitTag(Tag tag) { addTagToBodyIfApplicable(tag); ! if (isTable(tag)) { tables.add(tag); --- 65,69 ---- public void visitTag(Tag tag) { addTagToBodyIfApplicable(tag); ! if (isTable(tag)) { tables.add(tag); *************** *** 85,91 **** public void visitEndTag(EndTag endTag) { ! if (isBodyTag(endTag)) bodyTagBegin = false; ! addTagToBodyIfApplicable(endTag); } --- 85,91 ---- public void visitEndTag(EndTag endTag) { ! if (isBodyTag(endTag)) bodyTagBegin = false; ! addTagToBodyIfApplicable(endTag); } *************** *** 97,109 **** addTagToBodyIfApplicable(stringNode); } ! private boolean isBodyTag(Tag tag) { return tag.getTagName().equals("BODY"); } ! public NodeList getBody() { return nodesInBody; } ! public TableTag [] getTables() { TableTag [] tableArr = new TableTag[tables.size()]; --- 97,109 ---- addTagToBodyIfApplicable(stringNode); } ! private boolean isBodyTag(Tag tag) { return tag.getTagName().equals("BODY"); } ! public NodeList getBody() { return nodesInBody; } ! public TableTag [] getTables() { TableTag [] tableArr = new TableTag[tables.size()]; Index: LinkFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/LinkFindingVisitor.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** LinkFindingVisitor.java 8 Sep 2003 02:26:33 -0000 1.27 --- LinkFindingVisitor.java 10 Sep 2003 03:38:25 -0000 1.28 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 35,39 **** private boolean linkTagFound = false; private int count = 0; ! public LinkFindingVisitor(String linkTextToFind) { this.linkTextToFind = linkTextToFind.toUpperCase(); --- 35,39 ---- private boolean linkTagFound = false; private int count = 0; ! public LinkFindingVisitor(String linkTextToFind) { this.linkTextToFind = linkTextToFind.toUpperCase(); *************** *** 47,55 **** } } ! public boolean linkTextFound() { return linkTagFound; } ! public int getCount() { return count; --- 47,55 ---- } } ! public boolean linkTextFound() { return linkTagFound; } ! public int getCount() { return count; Index: NodeVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** NodeVisitor.java 8 Sep 2003 02:26:33 -0000 1.27 --- NodeVisitor.java 10 Sep 2003 03:38:25 -0000 1.28 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,86 **** private boolean recurseChildren; private boolean recurseSelf; ! public NodeVisitor() { ! this(true); } ! public NodeVisitor(boolean recurseChildren) { this.recurseChildren = recurseChildren; ! this.recurseSelf = true; } ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; } public void visitTag(Tag tag) { ! } public void visitStringNode(StringNode stringNode) { } ! public void visitLinkTag(LinkTag linkTag) { } ! public void visitImageTag(ImageTag imageTag) { } ! public void visitEndTag(EndTag endTag) { ! } ! public void visitTitleTag(TitleTag titleTag) { ! } public void visitRemarkNode(RemarkNode remarkNode) { ! } ! public boolean shouldRecurseChildren() { return recurseChildren; } ! public boolean shouldRecurseSelf() { return recurseSelf; --- 40,86 ---- private boolean recurseChildren; private boolean recurseSelf; ! public NodeVisitor() { ! this(true); } ! public NodeVisitor(boolean recurseChildren) { this.recurseChildren = recurseChildren; ! this.recurseSelf = true; } ! public NodeVisitor(boolean recurseChildren,boolean recurseSelf) { this.recurseChildren = recurseChildren; ! this.recurseSelf = recurseSelf; } public void visitTag(Tag tag) { ! } public void visitStringNode(StringNode stringNode) { } ! public void visitLinkTag(LinkTag linkTag) { } ! public void visitImageTag(ImageTag imageTag) { } ! public void visitEndTag(EndTag endTag) { ! } ! public void visitTitleTag(TitleTag titleTag) { ! } public void visitRemarkNode(RemarkNode remarkNode) { ! } ! public boolean shouldRecurseChildren() { return recurseChildren; } ! public boolean shouldRecurseSelf() { return recurseSelf; *************** *** 89,93 **** /** * Override this method if you wish to do special ! * processing upon completion of parsing */ public void finishedParsing() { --- 89,93 ---- /** * Override this method if you wish to do special ! * processing upon completion of parsing */ public void finishedParsing() { Index: ObjectFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/ObjectFindingVisitor.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** ObjectFindingVisitor.java 8 Sep 2003 02:26:33 -0000 1.32 --- ObjectFindingVisitor.java 10 Sep 2003 03:38:25 -0000 1.33 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 39,47 **** private int count = 0; private NodeList tags; ! public ObjectFindingVisitor(Class classTypeToFind) { this(classTypeToFind,false); } ! public ObjectFindingVisitor(Class classTypeToFind,boolean recurse) { super(recurse); --- 39,47 ---- private int count = 0; private NodeList tags; ! public ObjectFindingVisitor(Class classTypeToFind) { this(classTypeToFind,false); } ! public ObjectFindingVisitor(Class classTypeToFind,boolean recurse) { super(recurse); *************** *** 49,53 **** this.tags = new NodeList(); } ! public int getCount() { return count; --- 49,53 ---- this.tags = new NodeList(); } ! public int getCount() { return count; Index: StringFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/StringFindingVisitor.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** StringFindingVisitor.java 8 Sep 2003 02:26:33 -0000 1.32 --- StringFindingVisitor.java 10 Sep 2003 03:38:25 -0000 1.33 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 38,42 **** private int foundCount; private boolean multipleSearchesWithinStrings; ! public StringFindingVisitor(String stringToFind) { this.stringToFind = stringToFind.toUpperCase(); --- 38,42 ---- private int foundCount; private boolean multipleSearchesWithinStrings; ! public StringFindingVisitor(String stringToFind) { this.stringToFind = stringToFind.toUpperCase(); *************** *** 44,55 **** multipleSearchesWithinStrings = false; } ! public void doMultipleSearchesWithinStrings() { multipleSearchesWithinStrings = true; } ! public void visitStringNode(StringNode stringNode) { String stringToBeSearched = stringNode.getText().toUpperCase(); ! if (!multipleSearchesWithinStrings && stringToBeSearched.indexOf(stringToFind) != -1) { stringFound = true; --- 44,55 ---- multipleSearchesWithinStrings = false; } ! public void doMultipleSearchesWithinStrings() { multipleSearchesWithinStrings = true; } ! public void visitStringNode(StringNode stringNode) { String stringToBeSearched = stringNode.getText().toUpperCase(); ! if (!multipleSearchesWithinStrings && stringToBeSearched.indexOf(stringToFind) != -1) { stringFound = true; *************** *** 60,72 **** index = stringToBeSearched.indexOf(stringToFind, index+1); if (index!=-1) ! foundCount++; } while (index != -1); } } ! public boolean stringWasFound() { return stringFound; ! } ! public int stringFoundCount() { return foundCount; --- 60,72 ---- index = stringToBeSearched.indexOf(stringToFind, index+1); if (index!=-1) ! foundCount++; } while (index != -1); } } ! public boolean stringWasFound() { return stringFound; ! } ! public int stringFoundCount() { return foundCount; Index: TagFindingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TagFindingVisitor.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** TagFindingVisitor.java 8 Sep 2003 02:26:33 -0000 1.33 --- TagFindingVisitor.java 10 Sep 2003 03:38:25 -0000 1.34 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http:// www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http:// www.industriallogic.com *************** *** 43,47 **** private NodeList [] endTags; private boolean endTagCheck; ! public TagFindingVisitor(String [] tagsToBeFound) { this(tagsToBeFound,false); --- 43,47 ---- private NodeList [] endTags; private boolean endTagCheck; ! public TagFindingVisitor(String [] tagsToBeFound) { this(tagsToBeFound,false); *************** *** 61,67 **** } this.count = new int[tagsToBeFound.length]; ! this.endTagCheck = endTagCheck; ! } ! public int getTagCount(int index) { return count[index]; --- 61,67 ---- } this.count = new int[tagsToBeFound.length]; ! this.endTagCheck = endTagCheck; ! } ! public int getTagCount(int index) { return count[index]; *************** *** 88,95 **** } } ! public int getEndTagCount(int index) { return endTagCount[index]; } ! } --- 88,95 ---- } } ! public int getEndTagCount(int index) { return endTagCount[index]; } ! } Index: TextExtractingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/TextExtractingVisitor.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** TextExtractingVisitor.java 8 Sep 2003 02:26:33 -0000 1.31 --- TextExtractingVisitor.java 10 Sep 2003 03:38:25 -0000 1.32 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 49,53 **** private StringBuffer textAccumulator; private boolean preTagBeingProcessed; ! public TextExtractingVisitor() { textAccumulator = new StringBuffer(); --- 49,53 ---- private StringBuffer textAccumulator; private boolean preTagBeingProcessed; ! public TextExtractingVisitor() { textAccumulator = new StringBuffer(); *************** *** 62,66 **** String text = stringNode.getText(); if (!preTagBeingProcessed) { ! text = Translate.decode(text); text = replaceNonBreakingSpaceWithOrdinarySpace(text); } --- 62,66 ---- String text = stringNode.getText(); if (!preTagBeingProcessed) { ! text = Translate.decode(text); text = replaceNonBreakingSpaceWithOrdinarySpace(text); } *************** *** 77,86 **** public void visitEndTag(EndTag endTag) { ! if (isPreTag(endTag)) preTagBeingProcessed = false; } public void visitTag(Tag tag) { ! if (isPreTag(tag)) preTagBeingProcessed = true; } --- 77,86 ---- public void visitEndTag(EndTag endTag) { ! if (isPreTag(endTag)) preTagBeingProcessed = false; } public void visitTag(Tag tag) { ! if (isPreTag(tag)) preTagBeingProcessed = true; } Index: UrlModifyingVisitor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/UrlModifyingVisitor.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** UrlModifyingVisitor.java 8 Sep 2003 02:26:33 -0000 1.30 --- UrlModifyingVisitor.java 10 Sep 2003 03:38:25 -0000 1.31 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,50 **** private StringBuffer modifiedResult; private Parser parser; ! public UrlModifyingVisitor(Parser parser, String linkPrefix) { super(true,false); this.parser = parser; ! LinkScanner linkScanner = new LinkScanner(); parser.addScanner(linkScanner); parser.addScanner( --- 42,50 ---- private StringBuffer modifiedResult; private Parser parser; ! public UrlModifyingVisitor(Parser parser, String linkPrefix) { super(true,false); this.parser = parser; ! LinkScanner linkScanner = new LinkScanner(); parser.addScanner(linkScanner); parser.addScanner( *************** *** 53,60 **** ) ); ! this.linkPrefix =linkPrefix; modifiedResult = new StringBuffer(); } ! public void visitLinkTag(LinkTag linkTag) { linkTag.setLink(linkPrefix + linkTag.getLink()); --- 53,60 ---- ) ); ! this.linkPrefix =linkPrefix; modifiedResult = new StringBuffer(); } ! public void visitLinkTag(LinkTag linkTag) { linkTag.setLink(linkPrefix + linkTag.getLink()); *************** *** 65,69 **** modifiedResult.append(imageTag.toHtml()); } ! public void visitEndTag(EndTag endTag) { modifiedResult.append(endTag.toHtml()); --- 65,69 ---- modifiedResult.append(imageTag.toHtml()); } ! public void visitEndTag(EndTag endTag) { modifiedResult.append(endTag.toHtml()); *************** *** 77,83 **** modifiedResult.append(tag.toHtml()); } ! public String getModifiedResult() { ! return modifiedResult.toString(); } } --- 77,83 ---- modifiedResult.append(tag.toHtml()); } ! public String getModifiedResult() { ! return modifiedResult.toString(); } } Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/package.html,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** package.html 8 Sep 2003 02:26:33 -0000 1.13 --- package.html 10 Sep 2003 03:38:25 -0000 1.14 *************** *** 18,22 **** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software --- 18,22 ---- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software *************** *** 25,37 **** For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> --- 25,37 ---- For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/util Modified Files: ChainedException.java CommandLine.java DefaultParserFeedback.java FeedbackManager.java Generate.java IteratorImpl.java LinkProcessor.java NodeIterator.java NodeList.java ParserException.java ParserFeedback.java ParserUtils.java PeekingIterator.java SimpleNodeIterator.java Translate.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: ChainedException.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ChainedException.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** ChainedException.java 8 Sep 2003 02:26:32 -0000 1.38 --- ChainedException.java 10 Sep 2003 03:38:25 -0000 1.39 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,31 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Claude Duguay // --- 18,31 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Claude Duguay // *************** *** 78,87 **** super(message); } ! public ChainedException(Throwable throwable) { this.throwable = throwable; } ! public ChainedException(String message, Throwable throwable) { --- 78,87 ---- super(message); } ! public ChainedException(Throwable throwable) { this.throwable = throwable; } ! public ChainedException(String message, Throwable throwable) { *************** *** 89,93 **** this.throwable = throwable; } ! public String[] getMessageChain() { --- 89,93 ---- this.throwable = throwable; } ! public String[] getMessageChain() { *************** *** 97,101 **** return chain; } ! public Vector getMessageList() { --- 97,101 ---- return chain; } ! public Vector getMessageList() { *************** *** 122,136 **** return list; } ! public Throwable getThrowable() { return throwable; } ! public void printStackTrace() { printStackTrace(System.err); } ! public void printStackTrace(PrintStream out) { --- 122,136 ---- return list; } ! public Throwable getThrowable() { return throwable; } ! public void printStackTrace() { printStackTrace(System.err); } ! public void printStackTrace(PrintStream out) { Index: CommandLine.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/CommandLine.java,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -d -r1.36 -r1.37 *** CommandLine.java 8 Sep 2003 02:26:33 -0000 1.36 --- CommandLine.java 10 Sep 2003 03:38:25 -0000 1.37 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 74,83 **** parse(args); } ! public CommandLine(String[] args) { parse(args); } ! protected void parse(String[] args) { --- 74,83 ---- parse(args); } ! public CommandLine(String[] args) { parse(args); } ! protected void parse(String[] args) { *************** *** 90,94 **** nextArg = args[i + 1]; } ! if (thisArg.startsWith("-")) { --- 90,94 ---- nextArg = args[i + 1]; } ! if (thisArg.startsWith("-")) { *************** *** 139,173 **** } } ! public void addCommand(char command) { commands.add(new Character(command)); } ! public boolean hasValue(String key) { return values.containsKey(key); } ! public String getValue(String key) { return (String)values.get(key); } ! public boolean getFlag(String key) { return flags.contains(key); } ! public int getNameCount() { return names.size(); } ! public String getName(int index) { return (String)names.get(index); } ! public static void main(String[] args) { --- 139,173 ---- } } ! public void addCommand(char command) { commands.add(new Character(command)); } ! public boolean hasValue(String key) { return values.containsKey(key); } ! public String getValue(String key) { return (String)values.get(key); } ! public boolean getFlag(String key) { return flags.contains(key); } ! public int getNameCount() { return names.size(); } ! public String getName(int index) { return (String)names.get(index); } ! public static void main(String[] args) { Index: DefaultParserFeedback.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/DefaultParserFeedback.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** DefaultParserFeedback.java 8 Sep 2003 02:26:33 -0000 1.25 --- DefaultParserFeedback.java 10 Sep 2003 03:38:25 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 84,88 **** public DefaultParserFeedback (int mode) { ! if (mode<QUIET||mode>DEBUG) throw new IllegalArgumentException ( "illegal mode (" --- 84,88 ---- public DefaultParserFeedback (int mode) { ! if (mode<QUIET||mode>DEBUG) throw new IllegalArgumentException ( "illegal mode (" *************** *** 109,113 **** System.out.println ("INFO: " + message); } ! /** * Print an warning message. --- 109,113 ---- System.out.println ("INFO: " + message); } ! /** * Print an warning message. *************** *** 119,123 **** System.out.println ("WARNING: " + message); } ! /** * Print an error message. --- 119,123 ---- System.out.println ("WARNING: " + message); } ! /** * Print an error message. Index: FeedbackManager.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/FeedbackManager.java,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** FeedbackManager.java 8 Sep 2003 02:26:33 -0000 1.38 --- FeedbackManager.java 10 Sep 2003 03:38:25 -0000 1.39 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: Generate.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Generate.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** Generate.java 8 Sep 2003 02:26:33 -0000 1.40 --- Generate.java 10 Sep 2003 03:38:25 -0000 1.41 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,31 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // --- 18,31 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // *************** *** 62,66 **** */ protected Parser parser; ! /** * The system specific line separator string. --- 62,66 ---- */ protected Parser parser; ! /** * The system specific line separator string. *************** *** 90,94 **** * @param string The raw string. * @return The string with character references fixed. ! */ public String translate (String string) { --- 90,94 ---- * @param string The raw string. * @return The string with character references fixed. ! */ public String translate (String string) { *************** *** 96,102 **** int amp; StringBuffer ret; ! ret = new StringBuffer (4096); ! index = 0; while ((index < string.length ()) && (-1 != (amp = string.indexOf ('&', index)))) --- 96,102 ---- int amp; StringBuffer ret; ! ret = new StringBuffer (4096); ! index = 0; while ((index < string.length ()) && (-1 != (amp = string.indexOf ('&', index)))) *************** *** 147,157 **** } ret.append (string.substring (index)); ! return (ret.toString ()); } ! /** * Pull out text elements from the HTML. ! */ public void parse () throws --- 147,157 ---- } ret.append (string.substring (index)); ! return (ret.toString ()); } ! /** * Pull out text elements from the HTML. ! */ public void parse () throws *************** *** 166,170 **** { node = e.nextNode (); ! if (node instanceof StringNode) { --- 166,170 ---- { node = e.nextNode (); ! if (node instanceof StringNode) { *************** *** 204,208 **** } } ! String text = translate (buffer.toString ()); sgml (text); --- 204,208 ---- } } ! String text = translate (buffer.toString ()); sgml (text); *************** *** 232,236 **** return (ret); } ! /** * Rewrite the comment string. --- 232,236 ---- return (ret); } ! /** * Rewrite the comment string. *************** *** 249,253 **** int spaces; StringBuffer ret; ! ret = new StringBuffer (string.length ()); --- 249,253 ---- int spaces; StringBuffer ret; ! ret = new StringBuffer (string.length ()); *************** *** 266,278 **** if (index < string.length ()) ret.append (string.substring (index)); ! return (ret.toString ()); } ! /** * Pretty up a comment string. * @param string The comment to operate on. * @return The beautiful comment string. ! */ public String pretty (String string) { --- 266,278 ---- if (index < string.length ()) ret.append (string.substring (index)); ! return (ret.toString ()); } ! /** * Pretty up a comment string. * @param string The comment to operate on. * @return The beautiful comment string. ! */ public String pretty (String string) { *************** *** 280,284 **** int spaces; StringBuffer ret; ! ret = new StringBuffer (string.length ()); --- 280,284 ---- int spaces; StringBuffer ret; ! ret = new StringBuffer (string.length ()); *************** *** 296,300 **** if (index < string.length ()) ret.append (" // " + string.substring (index)); ! return (ret.toString ()); } --- 296,300 ---- if (index < string.length ()) ret.append (" // " + string.substring (index)); ! return (ret.toString ()); } *************** *** 306,319 **** * @param length The size to pad to. * @return The padded string. ! */ public String pad (String string, char character, int length) { StringBuffer ret; ! ret = new StringBuffer (length); ret.append (string); while (length > ret.length ()) ret.insert (0, character); ! return (ret.toString ()); } --- 306,319 ---- * @param length The size to pad to. * @return The padded string. ! */ public String pad (String string, char character, int length) { StringBuffer ret; ! ret = new StringBuffer (length); ret.append (string); while (length > ret.length ()) ret.insert (0, character); ! return (ret.toString ()); } *************** *** 324,332 **** * @return The character represented by the numeric character reference. * ! */ public String unicode (String string) { int code; ! if (string.startsWith ("\"&#") && string.endsWith (";\"")) { --- 324,332 ---- * @return The character represented by the numeric character reference. * ! */ public String unicode (String string) { int code; ! if (string.startsWith ("\"&#") && string.endsWith (";\"")) { *************** *** 354,358 **** * with the comment. * @param string The contents of the sgml declaration. ! */ public void extract (String string) { --- 354,358 ---- * with the comment. * @param string The contents of the sgml declaration. ! */ public void extract (String string) { *************** *** 412,416 **** int begin; int end; ! index = 0; while (-1 != (begin = string.indexOf ("<", index))) --- 412,416 ---- int begin; int end; ! index = 0; while (-1 != (begin = string.indexOf ("<", index))) Index: IteratorImpl.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/IteratorImpl.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** IteratorImpl.java 8 Sep 2003 02:26:33 -0000 1.26 --- IteratorImpl.java 10 Sep 2003 03:38:25 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 72,79 **** throw ex; } ! return (ret); } ! /** * Check if more nodes are available. --- 72,79 ---- throw ex; } ! return (ret); } ! /** * Check if more nodes are available. *************** *** 89,96 **** else ret = !(null == peek ()); ! return (ret); } ! /** * Get the next node. --- 89,96 ---- else ret = !(null == peek ()); ! return (ret); } ! /** * Get the next node. *************** *** 105,109 **** // should perhaps throw an exception? ret = null; ! return (ret); } --- 105,109 ---- // should perhaps throw an exception? ret = null; ! return (ret); } Index: LinkProcessor.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/LinkProcessor.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** LinkProcessor.java 8 Sep 2003 02:26:33 -0000 1.25 --- LinkProcessor.java 10 Sep 2003 03:38:25 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 65,69 **** { String ret; ! try { --- 65,69 ---- { String ret; ! try { *************** *** 84,88 **** ret = link; } ! return (Translate.decode (ret)); } --- 84,88 ---- ret = link; } ! return (Translate.decode (ret)); } *************** *** 133,137 **** char ch; StringBuffer returnURL; ! index = url.indexOf (' '); if (-1 != index) --- 133,137 ---- char ch; StringBuffer returnURL; ! index = url.indexOf (' '); if (-1 != index) *************** *** 150,157 **** url = returnURL.toString (); } ! return (url); } ! /** * Check if a resource is a valid URL. --- 150,157 ---- url = returnURL.toString (); } ! return (url); } ! /** * Check if a resource is a valid URL. *************** *** 162,166 **** URL url; boolean ret; ! try { --- 162,166 ---- URL url; boolean ret; ! try { *************** *** 172,179 **** ret = false; } ! return (ret); } ! /** * Returns the baseUrl. --- 172,179 ---- ret = false; } ! return (ret); } ! /** * Returns the baseUrl. *************** *** 184,188 **** return baseUrl; } ! /** * Sets the baseUrl. --- 184,188 ---- return baseUrl; } ! /** * Sets the baseUrl. *************** *** 204,207 **** } } ! } --- 204,207 ---- } } ! } Index: NodeIterator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeIterator.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** NodeIterator.java 8 Sep 2003 02:26:33 -0000 1.26 --- NodeIterator.java 10 Sep 2003 03:38:25 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,41 **** */ public boolean hasMoreNodes() throws ParserException; ! /** * Get the next node. --- 37,41 ---- */ public boolean hasMoreNodes() throws ParserException; ! /** * Get the next node. *************** *** 43,46 **** */ public Node nextNode() throws ParserException; ! } --- 43,46 ---- */ public Node nextNode() throws ParserException; ! } Index: NodeList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeList.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** NodeList.java 8 Sep 2003 02:26:33 -0000 1.42 --- NodeList.java 10 Sep 2003 03:38:25 -0000 1.43 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,46 **** private int capacityIncrement; private int numberOfAdjustments; ! public NodeList() { size = 0; --- 42,46 ---- private int capacityIncrement; private int numberOfAdjustments; ! public NodeList() { size = 0; *************** *** 50,56 **** numberOfAdjustments = 0; } ! public void add(Node node) { ! if (size==capacity) adjustVectorCapacity(); nodeData[size++]=node; --- 50,56 ---- numberOfAdjustments = 0; } ! public void add(Node node) { ! if (size==capacity) adjustVectorCapacity(); nodeData[size++]=node; *************** *** 73,77 **** public void prepend(Node node) { ! if (size==capacity) adjustVectorCapacity(); System.arraycopy (nodeData, 0, nodeData, 1, size); --- 73,77 ---- public void prepend(Node node) { ! if (size==capacity) adjustVectorCapacity(); System.arraycopy (nodeData, 0, nodeData, 1, size); *************** *** 92,100 **** return new Node[capacity]; } ! public int size() { return size; } ! public Node elementAt(int i) { return nodeData[i]; --- 92,100 ---- return new Node[capacity]; } ! public int size() { return size; } ! public Node elementAt(int i) { return nodeData[i]; *************** *** 104,116 **** return numberOfAdjustments; } ! public SimpleNodeIterator elements() { return new SimpleNodeIterator() { int count = 0; ! public boolean hasMoreNodes() { return count < size; } ! public Node nextNode() { synchronized (NodeList.this) { --- 104,116 ---- return numberOfAdjustments; } ! public SimpleNodeIterator elements() { return new SimpleNodeIterator() { int count = 0; ! public boolean hasMoreNodes() { return count < size; } ! public Node nextNode() { synchronized (NodeList.this) { *************** *** 123,152 **** }; } ! public Node [] toNodeArray() { Node [] nodeArray = newNodeArrayFor(size); ! System.arraycopy(nodeData, 0, nodeArray, 0, size); return nodeArray; } ! public String asString() { StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toPlainTextString()); return buff.toString(); } ! public String asHtml() { StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toHtml()); return buff.toString(); } ! public void remove(int index) { ! System.arraycopy(nodeData, index+1, nodeData, index, size-index-1); ! size--; } ! public void removeAll() { size = 0; --- 123,152 ---- }; } ! public Node [] toNodeArray() { Node [] nodeArray = newNodeArrayFor(size); ! System.arraycopy(nodeData, 0, nodeArray, 0, size); return nodeArray; } ! public String asString() { StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toPlainTextString()); return buff.toString(); } ! public String asHtml() { StringBuffer buff = new StringBuffer(); ! for (int i=0;i<size;i++) ! buff.append(nodeData[i].toHtml()); return buff.toString(); } ! public void remove(int index) { ! System.arraycopy(nodeData, index+1, nodeData, index, size-index-1); ! size--; } ! public void removeAll() { size = 0; *************** *** 156,160 **** numberOfAdjustments = 0; } ! public String toString() { StringBuffer text = new StringBuffer(); --- 156,160 ---- numberOfAdjustments = 0; } ! public String toString() { StringBuffer text = new StringBuffer(); *************** *** 172,176 **** return (searchFor (classType, false)); } ! /** * Search for nodes of the given type recursively. --- 172,176 ---- return (searchFor (classType, false)); } ! /** * Search for nodes of the given type recursively. *************** *** 184,188 **** NodeList children; NodeList ret; ! ret = new NodeList (); name = classType.getName (); --- 184,188 ---- NodeList children; NodeList ret; ! ret = new NodeList (); name = classType.getName (); Index: ParserException.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserException.java,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** ParserException.java 8 Sep 2003 02:26:33 -0000 1.23 --- ParserException.java 10 Sep 2003 03:38:25 -0000 1.24 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 46,55 **** super(message); } ! public ParserException(Throwable throwable) { super(throwable); } ! public ParserException(String message, Throwable throwable) { --- 46,55 ---- super(message); } ! public ParserException(Throwable throwable) { super(throwable); } ! public ParserException(String message, Throwable throwable) { Index: ParserFeedback.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserFeedback.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** ParserFeedback.java 8 Sep 2003 02:26:33 -0000 1.24 --- ParserFeedback.java 10 Sep 2003 03:38:25 -0000 1.25 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: ParserUtils.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/ParserUtils.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** ParserUtils.java 8 Sep 2003 02:26:33 -0000 1.28 --- ParserUtils.java 10 Sep 2003 03:38:25 -0000 1.29 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 38,42 **** public class ParserUtils { ! public static String toString(Tag tag) { String tagName = tag.getAttribute(Tag.TAGNAME); --- 38,42 ---- public class ParserUtils { ! public static String toString(Tag tag) { String tagName = tag.getAttribute(Tag.TAGNAME); *************** *** 64,68 **** return tempScanners; } ! public static void restoreScanners(NodeReader reader, Map tempScanners) { // Flush the scanners --- 64,68 ---- return tempScanners; } ! public static void restoreScanners(NodeReader reader, Map tempScanners) { // Flush the scanners Index: PeekingIterator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/PeekingIterator.java,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** PeekingIterator.java 8 Sep 2003 02:26:33 -0000 1.14 --- PeekingIterator.java 10 Sep 2003 03:38:25 -0000 1.15 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: SimpleNodeIterator.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/SimpleNodeIterator.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** SimpleNodeIterator.java 8 Sep 2003 02:26:33 -0000 1.28 --- SimpleNodeIterator.java 10 Sep 2003 03:38:25 -0000 1.29 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 50,54 **** * Get the next node. * @return The next node in the HTML stream, or null if there are no more ! * nodes. */ public Node nextNode(); --- 50,54 ---- * Get the next node. * @return The next node in the HTML stream, or null if there are no more ! * nodes. */ public Node nextNode(); Index: Translate.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/Translate.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** Translate.java 8 Sep 2003 02:26:33 -0000 1.34 --- Translate.java 10 Sep 2003 03:38:25 -0000 1.35 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,31 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // --- 18,31 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // *************** *** 470,474 **** return (ret); } ! public static String decode (StringBuffer stringBuffer) { return decode(stringBuffer.toString()); --- 470,474 ---- return (ret); } ! public static String decode (StringBuffer stringBuffer) { return decode(stringBuffer.toString()); Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/package.html,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** package.html 8 Sep 2003 02:26:33 -0000 1.13 --- package.html 10 Sep 2003 03:38:25 -0000 1.14 *************** *** 18,22 **** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software --- 18,22 ---- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software *************** *** 25,43 **** For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> <body bgcolor="white"> ! The util package is intended for holding utility classes that dont directly help with the parsing, but can take responsibilities out from some classes. Resuable code which can be reused by many classes, should be located ! in this package. <h2>Related Documentation</h2> --- 25,43 ---- For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> <body bgcolor="white"> ! The util package is intended for holding utility classes that dont directly help with the parsing, but can take responsibilities out from some classes. Resuable code which can be reused by many classes, should be located ! in this package. <h2>Related Documentation</h2> |
From: <der...@us...> - 2003-09-10 03:54:12
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/util/sort Modified Files: Ordered.java Sort.java Sortable.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: Ordered.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/Ordered.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Ordered.java 8 Sep 2003 02:26:33 -0000 1.5 --- Ordered.java 10 Sep 2003 03:38:25 -0000 1.6 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,31 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // --- 18,31 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // *************** *** 62,66 **** * Returns a negative integer, zero, or a positive integer * as this object is less than, equal to, or greater ! * than the second. * <p> * The implementor must ensure that --- 62,66 ---- * Returns a negative integer, zero, or a positive integer * as this object is less than, equal to, or greater ! * than the second. * <p> * The implementor must ensure that *************** *** 68,72 **** * for all x and y. (This implies that <code>x.compare(y)</code> * must throw an exception if and only if <code>y.compare(x)</code> ! * throws an exception.) * <p> * The implementor must also ensure that the relation is transitive: --- 68,72 ---- * for all x and y. (This implies that <code>x.compare(y)</code> * must throw an exception if and only if <code>y.compare(x)</code> ! * throws an exception.) * <p> * The implementor must also ensure that the relation is transitive: Index: Sort.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/Sort.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Sort.java 8 Sep 2003 02:26:33 -0000 1.5 --- Sort.java 10 Sep 2003 03:38:25 -0000 1.6 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,31 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // --- 18,31 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // *************** *** 356,360 **** { QuickSort (sortable, sortable.first (), sortable.last ()); ! } /** --- 356,360 ---- { QuickSort (sortable, sortable.first (), sortable.last ()); ! } /** *************** *** 406,410 **** int result; int ret; ! ret = -1; --- 406,410 ---- int result; int ret; ! ret = -1; *************** *** 412,416 **** ordered = null; while ((-1 == ret) && (lo <= hi)) ! { half = num / 2; mid = lo + ((0 != (num & 1)) ? half : half - 1); --- 412,416 ---- ordered = null; while ((-1 == ret) && (lo <= hi)) ! { half = num / 2; mid = lo + ((0 != (num & 1)) ? half : half - 1); *************** *** 420,429 **** ret = mid; else if (0 > result) ! { hi = mid - 1; num = ((0 != (num & 1)) ? half : half - 1); } else ! { lo = mid + 1; num = half; --- 420,429 ---- ret = mid; else if (0 > result) ! { hi = mid - 1; num = ((0 != (num & 1)) ? half : half - 1); } else ! { lo = mid + 1; num = half; *************** *** 461,470 **** int result; int ret; ! ret = -1; num = (hi - lo) + 1; while ((-1 == ret) && (lo <= hi)) ! { half = num / 2; mid = lo + ((0 != (num & 1)) ? half : half - 1); --- 461,470 ---- int result; int ret; ! ret = -1; num = (hi - lo) + 1; while ((-1 == ret) && (lo <= hi)) ! { half = num / 2; mid = lo + ((0 != (num & 1)) ? half : half - 1); *************** *** 473,482 **** ret = mid; else if (0 > result) ! { hi = mid - 1; num = ((0 != (num & 1)) ? half : half - 1); } else ! { lo = mid + 1; num = half; --- 473,482 ---- ret = mid; else if (0 > result) ! { hi = mid - 1; num = ((0 != (num & 1)) ? half : half - 1); } else ! { lo = mid + 1; num = half; Index: Sortable.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/Sortable.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** Sortable.java 8 Sep 2003 02:26:33 -0000 1.5 --- Sortable.java 10 Sep 2003 03:38:25 -0000 1.6 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,31 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // --- 18,31 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com ! // ! // This class was contributed by // Derrick Oswald // *************** *** 60,64 **** * @param reuse If this argument is not null, it is an object * acquired from a previous fetch that is no longer needed and ! * may be returned as the result if it makes mores sense to alter * and return it than to fetch or create a new element. That is, the * reuse object is garbage and may be used to avoid allocating a new --- 60,64 ---- * @param reuse If this argument is not null, it is an object * acquired from a previous fetch that is no longer needed and ! * may be returned as the result if it makes mores sense to alter * and return it than to fetch or create a new element. That is, the * reuse object is garbage and may be used to avoid allocating a new Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/sort/package.html,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** package.html 8 Sep 2003 02:26:33 -0000 1.4 --- package.html 10 Sep 2003 03:38:25 -0000 1.5 *************** *** 19,23 **** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software --- 19,23 ---- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software *************** *** 26,38 **** For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> <TITLE>Sort Package</TITLE> --- 26,38 ---- For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> <TITLE>Sort Package</TITLE> |
From: <der...@us...> - 2003-09-10 03:54:12
|
Update of /cvsroot/htmlparser/htmlparser/lib In directory sc8-pr-cvs1:/tmp/cvs-serv24483/lib Added Files: checkstyle-all-3.1.jar Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. --- NEW FILE: checkstyle-all-3.1.jar --- (This appears to be a binary file; contents omitted.) |
From: <der...@us...> - 2003-09-10 03:54:11
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests/utilTests Modified Files: AllTests.java BeanTest.java CharacterTranslationTest.java HTMLLinkProcessorTest.java HTMLParserUtilsTest.java HTMLTagParserTest.java NodeListTest.java SortTest.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/AllTests.java,v retrieving revision 1.46 retrieving revision 1.47 diff -C2 -d -r1.46 -r1.47 *** AllTests.java 8 Sep 2003 02:26:31 -0000 1.46 --- AllTests.java 10 Sep 2003 03:38:25 -0000 1.47 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,41 **** * @author: Administrator */ ! public class AllTests extends junit.framework.TestCase { /** --- 37,41 ---- * @author: Administrator */ ! public class AllTests extends junit.framework.TestCase { /** *************** *** 128,131 **** return suite; ! } } --- 128,131 ---- return suite; ! } } Index: BeanTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/BeanTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** BeanTest.java 8 Sep 2003 02:26:32 -0000 1.40 --- BeanTest.java 10 Sep 2003 03:38:25 -0000 1.41 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 64,68 **** ObjectOutputStream oos; byte[] ret; ! bos = new ByteArrayOutputStream (); oos = new ObjectOutputStream (bos); --- 64,68 ---- ObjectOutputStream oos; byte[] ret; ! bos = new ByteArrayOutputStream (); oos = new ObjectOutputStream (bos); *************** *** 73,77 **** return (ret); } ! protected Object unpickle (byte[] data) throws --- 73,77 ---- return (ret); } ! protected Object unpickle (byte[] data) throws *************** *** 87,91 **** ret = ois.readObject (); ois.close (); ! return (ret); } --- 87,91 ---- ret = ois.readObject (); ois.close (); ! return (ret); } *************** *** 100,104 **** PrintWriter out; String string; ! path = System.getProperty ("user.dir"); if (!path.endsWith (File.separator)) --- 100,104 ---- PrintWriter out; String string; ! path = System.getProperty ("user.dir"); if (!path.endsWith (File.separator)) *************** *** 133,142 **** Parser parser; byte[] data; ! parser = new Parser (); data = pickle (parser); parser = (Parser)unpickle (data); } ! public void testSerializable () throws --- 133,142 ---- Parser parser; byte[] data; ! parser = new Parser (); data = pickle (parser); parser = (Parser)unpickle (data); } ! public void testSerializable () throws *************** *** 149,153 **** NodeIterator enumeration; byte[] data; ! parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html"); enumeration = parser.elements (); --- 149,153 ---- NodeIterator enumeration; byte[] data; ! parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html"); enumeration = parser.elements (); *************** *** 166,170 **** ((Node)enumeration.nextNode ()).toHtml ()); } ! public void testSerializableScanners () throws --- 166,170 ---- ((Node)enumeration.nextNode ()).toHtml ()); } ! public void testSerializableScanners () throws *************** *** 177,181 **** NodeIterator enumeration; byte[] data; ! parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html"); parser.registerScanners (); --- 177,181 ---- NodeIterator enumeration; byte[] data; ! parser = new Parser ("http://htmlparser.sourceforge.net/test/example.html"); parser.registerScanners (); *************** *** 205,209 **** String text; byte[] data; ! sb = new StringBean (); sb.setURL ("http://htmlparser.sourceforge.net/test/example.html"); --- 205,209 ---- String text; byte[] data; ! sb = new StringBean (); sb.setURL ("http://htmlparser.sourceforge.net/test/example.html"); *************** *** 229,233 **** byte[] data; URL[] links2; ! lb = new LinkBean (); lb.setURL ("http://htmlparser.sourceforge.net/test/example.html"); --- 229,233 ---- byte[] data; URL[] links2; ! lb = new LinkBean (); lb.setURL ("http://htmlparser.sourceforge.net/test/example.html"); *************** *** 247,256 **** } } ! public void testStringBeanListener () { final StringBean sb; final Boolean hit[] = new Boolean[1]; ! sb = new StringBean (); hit[0] = Boolean.FALSE; --- 247,256 ---- } } ! public void testStringBeanListener () { final StringBean sb; final Boolean hit[] = new Boolean[1]; ! sb = new StringBean (); hit[0] = Boolean.FALSE; *************** *** 283,287 **** final LinkBean lb; final Boolean hit[] = new Boolean[1]; ! lb = new LinkBean (); hit[0] = Boolean.FALSE; --- 283,287 ---- final LinkBean lb; final Boolean hit[] = new Boolean[1]; ! lb = new LinkBean (); hit[0] = Boolean.FALSE; *************** *** 376,380 **** check (sb, "<html><head></head><body>\t\t x</body></html>", "x"); } ! /** * Test text including a "pre" tag --- 376,380 ---- check (sb, "<html><head></head><body>\t\t x</body></html>", "x"); } ! /** * Test text including a "pre" tag *************** *** 386,390 **** check (sb, "<body><pre>"+sampleCode+"</pre></body>", sampleCode); } ! /** * Test text including a "script" tag --- 386,390 ---- check (sb, "<body><pre>"+sampleCode+"</pre></body>", sampleCode); } ! /** * Test text including a "script" tag *************** *** 393,405 **** StringBean sb; sb = new StringBean (); ! String sampleScript = "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" + "else\r\n" + " document.write ('yyy');\r\n" + "</script>\r\n"; ! check (sb, "<body>"+sampleScript+"</body>", ""); } --- 393,405 ---- StringBean sb; sb = new StringBean (); ! String sampleScript = "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" + "else\r\n" + " document.write ('yyy');\r\n" + "</script>\r\n"; ! check (sb, "<body>"+sampleScript+"</body>", ""); } *************** *** 467,482 **** StringBean sb; sb = new StringBean (); ! String sampleScript = "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" + "else\r\n" + " document.write ('yyy');\r\n" + "</script>\r\n"; ! check (sb, "<body><pre>"+sampleScript+"</pre></body>", sampleScript); } ! } --- 467,482 ---- StringBean sb; sb = new StringBean (); ! String sampleScript = "<script language=\"javascript\">\r\n" ! + "if(navigator.appName.indexOf(\"Netscape\") != -1)\r\n" ! + " document.write ('xxx');\r\n" + "else\r\n" + " document.write ('yyy');\r\n" + "</script>\r\n"; ! check (sb, "<body><pre>"+sampleScript+"</pre></body>", sampleScript); } ! } Index: CharacterTranslationTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/CharacterTranslationTest.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** CharacterTranslationTest.java 8 Sep 2003 02:26:32 -0000 1.30 --- CharacterTranslationTest.java 10 Sep 2003 03:38:25 -0000 1.31 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 39,43 **** super (name); } ! public void testInitialCharacterEntityReference () { --- 39,43 ---- super (name); } ! public void testInitialCharacterEntityReference () { Index: HTMLLinkProcessorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/HTMLLinkProcessorTest.java,v retrieving revision 1.44 retrieving revision 1.45 diff -C2 -d -r1.44 -r1.45 *** HTMLLinkProcessorTest.java 8 Sep 2003 02:26:32 -0000 1.44 --- HTMLLinkProcessorTest.java 10 Sep 2003 03:38:25 -0000 1.45 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 46,55 **** public void testIsURL() { String resourceLoc1 = "http://someurl.com"; ! String resourceLoc2 = "myfilehttp.dat"; assertTrue(resourceLoc1+" should be a url",LinkProcessor.isURL(resourceLoc1)); ! assertTrue(resourceLoc2+" should not be a url",!LinkProcessor.isURL(resourceLoc2)); String resourceLoc3 = "file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html"; assertTrue(resourceLoc3+" should be a url",LinkProcessor.isURL(resourceLoc3)); ! } --- 46,55 ---- public void testIsURL() { String resourceLoc1 = "http://someurl.com"; ! String resourceLoc2 = "myfilehttp.dat"; assertTrue(resourceLoc1+" should be a url",LinkProcessor.isURL(resourceLoc1)); ! assertTrue(resourceLoc2+" should not be a url",!LinkProcessor.isURL(resourceLoc2)); String resourceLoc3 = "file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html"; assertTrue(resourceLoc3+" should be a url",LinkProcessor.isURL(resourceLoc3)); ! } *************** *** 82,86 **** static final String baseURI = "http://a/b/c/d;p?q"; // the relative URI would be resolved as follows: ! // C.1. Normal Examples // g:h = g:h --- 82,86 ---- static final String baseURI = "http://a/b/c/d;p?q"; // the relative URI would be resolved as follows: ! // C.1. Normal Examples // g:h = g:h *************** *** 196,200 **** assertEquals ("test22 failed", "http://a/g", (new LinkProcessor ()).extract ("../../g", baseURI)); } ! // C.2. Abnormal Examples // Although the following abnormal examples are unlikely to occur in --- 196,200 ---- assertEquals ("test22 failed", "http://a/g", (new LinkProcessor ()).extract ("../../g", baseURI)); } ! // C.2. Abnormal Examples // Although the following abnormal examples are unlikely to occur in Index: HTMLParserUtilsTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/HTMLParserUtilsTest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** HTMLParserUtilsTest.java 8 Sep 2003 02:26:32 -0000 1.8 --- HTMLParserUtilsTest.java 10 Sep 2003 03:38:25 -0000 1.9 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: HTMLTagParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/HTMLTagParserTest.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** HTMLTagParserTest.java 8 Sep 2003 02:26:32 -0000 1.43 --- HTMLTagParserTest.java 10 Sep 2003 03:38:25 -0000 1.44 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 46,53 **** tagParser.correctTag(tag); assertStringEquals("Corrected Tag","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",tag.getText()); ! } public void testInsertInvertedCommasCorrectly() { ! StringBuffer test = new StringBuffer("a b=c d e = f"); StringBuffer result = tagParser.insertInvertedCommasCorrectly(test); assertStringEquals("Expected Correction","a b=\"c d\" e=\"f\"",result.toString()); --- 46,53 ---- tagParser.correctTag(tag); assertStringEquals("Corrected Tag","font face=\"Arial,helvetica,\" sans-serif=\"sans-serif\" size=\"2\" color=\"#FFFFFF\"",tag.getText()); ! } public void testInsertInvertedCommasCorrectly() { ! StringBuffer test = new StringBuffer("a b=c d e = f"); StringBuffer result = tagParser.insertInvertedCommasCorrectly(test); assertStringEquals("Expected Correction","a b=\"c d\" e=\"f\"",result.toString()); *************** *** 57,64 **** String test = " fdfdf dfdf "; assertEquals("Expected Pruned string","fdfdf dfdf",TagParser.pruneSpaces(test)); ! } protected void setUp() { ! tagParser = new TagParser(new DefaultParserFeedback()); ! } } --- 57,64 ---- String test = " fdfdf dfdf "; assertEquals("Expected Pruned string","fdfdf dfdf",TagParser.pruneSpaces(test)); ! } protected void setUp() { ! tagParser = new TagParser(new DefaultParserFeedback()); ! } } Index: NodeListTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/NodeListTest.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** NodeListTest.java 8 Sep 2003 02:26:32 -0000 1.16 --- NodeListTest.java 10 Sep 2003 03:38:25 -0000 1.17 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 38,42 **** private NodeList nodeList; private Node[] testNodes; ! public NodeListTest(String name) { super(name); --- 38,42 ---- private NodeList nodeList; private Node[] testNodes; ! public NodeListTest(String name) { super(name); *************** *** 44,50 **** protected void setUp() { ! nodeList = new NodeList(); } ! public void testAddOneItem() { Node node = createHTMLNodeObject(); --- 44,50 ---- protected void setUp() { ! nodeList = new NodeList(); } ! public void testAddOneItem() { Node node = createHTMLNodeObject(); *************** *** 63,77 **** assertTrue("Second Element",node2==nodeList.elementAt(1)); } ! public void testAddTenItems() { ! createTestDataAndPutInVector(10); assertTestDataCouldBeExtractedFromVector(10); } ! public void testAddElevenItems() { createTestDataAndPutInVector(11); assertTestDataCouldBeExtractedFromVector(11); } ! public void testAddThirtyItems() { createTestDataAndPutInVector(30); --- 63,77 ---- assertTrue("Second Element",node2==nodeList.elementAt(1)); } ! public void testAddTenItems() { ! createTestDataAndPutInVector(10); assertTestDataCouldBeExtractedFromVector(10); } ! public void testAddElevenItems() { createTestDataAndPutInVector(11); assertTestDataCouldBeExtractedFromVector(11); } ! public void testAddThirtyItems() { createTestDataAndPutInVector(30); *************** *** 79,83 **** assertEquals("Number of Adjustments",1,nodeList.getNumberOfAdjustments()); } ! public void testAddThirtyOneItems() { createTestDataAndPutInVector(31); --- 79,83 ---- assertEquals("Number of Adjustments",1,nodeList.getNumberOfAdjustments()); } ! public void testAddThirtyOneItems() { createTestDataAndPutInVector(31); *************** *** 85,89 **** assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } ! public void testAddFiftyItems() { createTestDataAndPutInVector(50); --- 85,89 ---- assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } ! public void testAddFiftyItems() { createTestDataAndPutInVector(50); *************** *** 91,95 **** assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } ! public void testAddFiftyOneItems() { createTestDataAndPutInVector(51); --- 91,95 ---- assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } ! public void testAddFiftyOneItems() { createTestDataAndPutInVector(51); *************** *** 97,106 **** assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } ! public void testAddTwoHundredItems() { createTestDataAndPutInVector(200); assertEquals("Number of Adjustments",4,nodeList.getNumberOfAdjustments()); } ! public void testElements() throws Exception { createTestDataAndPutInVector(11); --- 97,106 ---- assertEquals("Number of Adjustments",2,nodeList.getNumberOfAdjustments()); } ! public void testAddTwoHundredItems() { createTestDataAndPutInVector(200); assertEquals("Number of Adjustments",4,nodeList.getNumberOfAdjustments()); } ! public void testElements() throws Exception { createTestDataAndPutInVector(11); *************** *** 112,118 **** i++; } ! } ! private Node createHTMLNodeObject() { Node node = new AbstractNode(10,20) { --- 112,118 ---- i++; } ! } ! private Node createHTMLNodeObject() { Node node = new AbstractNode(10,20) { *************** *** 122,134 **** public void collectInto(NodeList collectionList, String filter) { } ! public String toHtml() { return null; } ! public String toPlainTextString() { return null; } ! public String toString() { return ""; --- 122,134 ---- public void collectInto(NodeList collectionList, String filter) { } ! public String toHtml() { return null; } ! public String toPlainTextString() { return null; } ! public String toString() { return ""; *************** *** 139,143 **** private void createTestDataAndPutInVector(int nodeCount) { ! testNodes = new AbstractNode[nodeCount]; for (int i=0;i<nodeCount;i++) { testNodes[i]= createHTMLNodeObject(); --- 139,143 ---- private void createTestDataAndPutInVector(int nodeCount) { ! testNodes = new AbstractNode[nodeCount]; for (int i=0;i<nodeCount;i++) { testNodes[i]= createHTMLNodeObject(); *************** *** 151,155 **** } } ! public void testToNodeArray() { createTestDataAndPutInVector(387); --- 151,155 ---- } } ! public void testToNodeArray() { createTestDataAndPutInVector(387); *************** *** 159,163 **** assertNotNull("node "+i+" should not be null",nodes[i]); } ! public void testRemove() { Node node1 = createHTMLNodeObject(); --- 159,163 ---- assertNotNull("node "+i+" should not be null",nodes[i]); } ! public void testRemove() { Node node1 = createHTMLNodeObject(); *************** *** 172,176 **** assertTrue("First Element",node1==nodeList.elementAt(0)); } ! public void testRemoveAll() { Node node1 = createHTMLNodeObject(); --- 172,176 ---- assertTrue("First Element",node1==nodeList.elementAt(0)); } ! public void testRemoveAll() { Node node1 = createHTMLNodeObject(); *************** *** 186,191 **** assertTrue("Second Element",null==nodeList.elementAt(1)); } ! ! public static void main(String[] args) { new junit.awtui.TestRunner().start(new String[] {NodeListTest.class.getName()}); --- 186,191 ---- assertTrue("Second Element",null==nodeList.elementAt(1)); } ! ! public static void main(String[] args) { new junit.awtui.TestRunner().start(new String[] {NodeListTest.class.getName()}); Index: SortTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/SortTest.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** SortTest.java 8 Sep 2003 02:26:32 -0000 1.4 --- SortTest.java 10 Sep 2003 03:38:25 -0000 1.5 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 46,50 **** public class SortTest extends TestCase { ! /** * Creates a new instance of SortTest --- 46,50 ---- public class SortTest extends TestCase { ! /** * Creates a new instance of SortTest *************** *** 71,75 **** return (mData.compareTo (((Item)o).mData)); } ! public String toString () { --- 71,75 ---- return (mData.compareTo (((Item)o).mData)); } ! public String toString () { *************** *** 86,90 **** { StringTokenizer toks; ! toks = new StringTokenizer (words); outer: --- 86,90 ---- { StringTokenizer toks; ! toks = new StringTokenizer (words); outer: *************** *** 97,101 **** // Sortable interface // ! public int first () { --- 97,101 ---- // Sortable interface // ! public int first () { *************** *** 146,155 **** return ((int)ret); ! } } /** * Test the operation of the static quicksort algorithm. ! */ public void testQuickSort () { --- 146,155 ---- return ((int)ret); ! } } /** * Test the operation of the static quicksort algorithm. ! */ public void testQuickSort () { *************** *** 167,171 **** Sort.QuickSort (words); ! assertEquals ("element 0 wrong ", "Derrick", words[0].mData); assertEquals ("element 1 wrong ", "breast", words[1].mData); --- 167,171 ---- Sort.QuickSort (words); ! assertEquals ("element 0 wrong ", "Derrick", words[0].mData); assertEquals ("element 1 wrong ", "breast", words[1].mData); *************** *** 180,184 **** /** * Test the operation of quicksort on a sortable list. ! */ public void testSortList () { --- 180,184 ---- /** * Test the operation of quicksort on a sortable list. ! */ public void testSortList () { *************** *** 200,209 **** + "raths slithy the the the the toves wabe were", b.toString ()); ! } ! /** * Test the operation of quicksort on a vector of ordered items. ! */ public void testSortVector () { --- 200,209 ---- + "raths slithy the the the the toves wabe were", b.toString ()); ! } ! /** * Test the operation of quicksort on a vector of ordered items. ! */ public void testSortVector () { *************** *** 217,221 **** if (f.isFile ()) directory.addElement (f); ! } Sort.QuickSort (directory); --- 217,221 ---- if (f.isFile ()) directory.addElement (f); ! } Sort.QuickSort (directory); Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/utilTests/package.html,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** package.html 8 Sep 2003 02:26:32 -0000 1.13 --- package.html 10 Sep 2003 03:38:25 -0000 1.14 *************** *** 18,22 **** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software --- 18,22 ---- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software *************** *** 25,41 **** For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> <body bgcolor="white"> ! This package contains testcases for the util package. <h2>Related Documentation</h2> --- 25,41 ---- For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> <body bgcolor="white"> ! This package contains testcases for the util package. <h2>Related Documentation</h2> |
From: <der...@us...> - 2003-09-10 03:54:10
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests/visitorsTests Modified Files: AllTests.java CompositeTagFindingVisitorTest.java HtmlPageTest.java LinkFindingVisitorTest.java NodeVisitorTest.java StringFindingVisitorTest.java TagFindingVisitorTest.java TextExtractingVisitorTest.java UrlModifyingVisitorTest.java Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/AllTests.java,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** AllTests.java 8 Sep 2003 02:26:32 -0000 1.34 --- AllTests.java 10 Sep 2003 03:38:25 -0000 1.35 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 31,43 **** import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { public AllTests(String name) { super(name); } ! public static TestSuite suite() { TestSuite suite = new TestSuite("Visitor Tests"); ! suite.addTestSuite(CompositeTagFindingVisitorTest.class); suite.addTestSuite(HtmlPageTest.class); --- 31,43 ---- import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { public AllTests(String name) { super(name); } ! public static TestSuite suite() { TestSuite suite = new TestSuite("Visitor Tests"); ! suite.addTestSuite(CompositeTagFindingVisitorTest.class); suite.addTestSuite(HtmlPageTest.class); *************** *** 50,54 **** return suite; ! } /** --- 50,54 ---- return suite; ! } /** *************** *** 94,103 **** * from http://www.mail-archive.com/commons-user%40jakarta.apache.org/msg02958.html * ! * The problem is within the UI test runners of JUnit. They bring ! * with them a custom classloader, which causes the ! * LogConfigurationException. Unfortunately Log4j doesn't work * either. ! * ! * Solution: Disable "Reload classes every run" or start JUnit with * command line option -noloading before the name of the Testsuite. */ --- 94,103 ---- * from http://www.mail-archive.com/commons-user%40jakarta.apache.org/msg02958.html * ! * The problem is within the UI test runners of JUnit. They bring ! * with them a custom classloader, which causes the ! * LogConfigurationException. Unfortunately Log4j doesn't work * either. ! * ! * Solution: Disable "Reload classes every run" or start JUnit with * command line option -noloading before the name of the Testsuite. */ Index: CompositeTagFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/CompositeTagFindingVisitorTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** CompositeTagFindingVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.7 --- CompositeTagFindingVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: HtmlPageTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/HtmlPageTest.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** HtmlPageTest.java 8 Sep 2003 02:26:32 -0000 1.10 --- HtmlPageTest.java 10 Sep 2003 03:38:25 -0000 1.11 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,44 **** public class HtmlPageTest extends ParserTestCase { ! private static final String SIMPLE_PAGE = "<html>" + "<head>" + --- 40,44 ---- public class HtmlPageTest extends ParserTestCase { ! private static final String SIMPLE_PAGE = "<html>" + "<head>" + *************** *** 50,54 **** "</html>"; ! private static final String PAGE_WITH_TABLE = "<html>" + "<head>" + --- 50,54 ---- "</html>"; ! private static final String PAGE_WITH_TABLE = "<html>" + "<head>" + *************** *** 65,69 **** "</body>" + "</html>"; ! public HtmlPageTest(String name) { super(name); --- 65,69 ---- "</body>" + "</html>"; ! public HtmlPageTest(String name) { super(name); *************** *** 71,75 **** public void testCreateSimplePage() throws Exception { ! createParser( SIMPLE_PAGE ); --- 71,75 ---- public void testCreateSimplePage() throws Exception { ! createParser( SIMPLE_PAGE ); *************** *** 93,99 **** ); } ! public void testCreatePageWithTables() throws Exception { ! createParser( PAGE_WITH_TABLE ); --- 93,99 ---- ); } ! public void testCreatePageWithTables() throws Exception { ! createParser( PAGE_WITH_TABLE ); *************** *** 112,116 **** "</table>", bodyNodes.asHtml() ! ); TableTag tables [] = page.getTables(); assertEquals("number of tables",1,tables.length); --- 112,116 ---- "</table>", bodyNodes.asHtml() ! ); TableTag tables [] = page.getTables(); assertEquals("number of tables",1,tables.length); Index: LinkFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/LinkFindingVisitorTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** LinkFindingVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.7 --- LinkFindingVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 33,37 **** public class LinkFindingVisitorTest extends ParserTestCase { ! private String html = "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, <A href=\"http://www.industriallogic.com\">Industrial Logic</a></BODY></HTML>"; --- 33,37 ---- public class LinkFindingVisitorTest extends ParserTestCase { ! private String html = "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, <A href=\"http://www.industriallogic.com\">Industrial Logic</a></BODY></HTML>"; *************** *** 48,51 **** assertEquals("Link Count",1,visitor.getCount()); } ! } --- 48,51 ---- assertEquals("Link Count",1,visitor.getCount()); } ! } Index: NodeVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/NodeVisitorTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** NodeVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.7 --- NodeVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,46 **** super(name); } ! public void testVisitTag() throws Exception { ParameterVisitor visitor = new ParameterVisitor(); --- 42,46 ---- super(name); } ! public void testVisitTag() throws Exception { ParameterVisitor visitor = new ParameterVisitor(); *************** *** 52,71 **** ); parser.visitAllNodesWith(visitor); ! assertEquals("value of key1","value1",visitor.getValue("key1")); assertEquals("value of key2","value2",visitor.getValue("key2")); } ! class ParameterVisitor extends NodeVisitor { Map paramsMap = new HashMap(); String lastKeyVisited; ! public String getValue(String key) { return (String)paramsMap.get(key); } ! public void visitStringNode(StringNode stringNode) { paramsMap.put(lastKeyVisited,stringNode.getText()); } ! public void visitTag(Tag tag) { if (tag.getTagName().equals("PARAM")) { --- 52,71 ---- ); parser.visitAllNodesWith(visitor); ! assertEquals("value of key1","value1",visitor.getValue("key1")); assertEquals("value of key2","value2",visitor.getValue("key2")); } ! class ParameterVisitor extends NodeVisitor { Map paramsMap = new HashMap(); String lastKeyVisited; ! public String getValue(String key) { return (String)paramsMap.get(key); } ! public void visitStringNode(StringNode stringNode) { paramsMap.put(lastKeyVisited,stringNode.getText()); } ! public void visitTag(Tag tag) { if (tag.getTagName().equals("PARAM")) { Index: StringFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/StringFindingVisitorTest.java,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** StringFindingVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.9 --- StringFindingVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.10 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 33,45 **** public class StringFindingVisitorTest extends ParserTestCase { ! private static final String HTML = "<HTML><HEAD><TITLE>This is the Title</TITLE>" + "</HEAD><BODY>Hello World, this is an excellent parser</BODY></HTML>"; ! ! private static final String HTML_TO_SEARCH = "<HTML><HEAD><TITLE>test</TITLE></HEAD>\n"+ "<BODY><H1>This is a test page</H1>\n"+ "Writing tests is good for code. Testing is a good\n"+ ! "philosophy. Test driven development is even better.\n"; public StringFindingVisitorTest(String name) { --- 33,45 ---- public class StringFindingVisitorTest extends ParserTestCase { ! private static final String HTML = "<HTML><HEAD><TITLE>This is the Title</TITLE>" + "</HEAD><BODY>Hello World, this is an excellent parser</BODY></HTML>"; ! ! private static final String HTML_TO_SEARCH = "<HTML><HEAD><TITLE>test</TITLE></HEAD>\n"+ "<BODY><H1>This is a test page</H1>\n"+ "Writing tests is good for code. Testing is a good\n"+ ! "philosophy. Test driven development is even better.\n"; public StringFindingVisitorTest(String name) { *************** *** 53,57 **** assertTrue("Hello found", visitor.stringWasFound()); } ! public void testStringNotFound() throws Exception { createParser(HTML); --- 53,57 ---- assertTrue("Hello found", visitor.stringWasFound()); } ! public void testStringNotFound() throws Exception { createParser(HTML); *************** *** 60,64 **** assertTrue("industrial logic should not have been found", !visitor.stringWasFound()); } ! public void testStringInTagNotFound() throws Exception { createParser(HTML); --- 60,64 ---- assertTrue("industrial logic should not have been found", !visitor.stringWasFound()); } ! public void testStringInTagNotFound() throws Exception { createParser(HTML); *************** *** 74,78 **** assertTrue("text should be found", visitor.stringWasFound()); } ! public void testStringFoundCount() throws Exception { createParser(HTML); --- 74,78 ---- assertTrue("text should be found", visitor.stringWasFound()); } ! public void testStringFoundCount() throws Exception { createParser(HTML); *************** *** 80,84 **** parser.visitAllNodesWith(visitor); assertEquals("# times 'is' was found", 2, visitor.stringFoundCount()); ! visitor = new StringFindingVisitor("and"); parser.visitAllNodesWith(visitor); --- 80,84 ---- parser.visitAllNodesWith(visitor); assertEquals("# times 'is' was found", 2, visitor.stringFoundCount()); ! visitor = new StringFindingVisitor("and"); parser.visitAllNodesWith(visitor); *************** *** 94,98 **** } ! ! } --- 94,98 ---- } ! ! } Index: TagFindingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/TagFindingVisitorTest.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** TagFindingVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.10 --- TagFindingVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.11 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 35,39 **** public class TagFindingVisitorTest extends ParserTestCase { ! private String html = "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD>" + "<BODY>Hello World, this is an excellent parser</BODY>" + --- 35,39 ---- public class TagFindingVisitorTest extends ParserTestCase { ! private String html = "<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD>" + "<BODY>Hello World, this is an excellent parser</BODY>" + *************** *** 45,49 **** super(name); } ! public void setUp() { createParser(html); --- 45,49 ---- super(name); } ! public void setUp() { createParser(html); *************** *** 61,67 **** assertEquals("LI tags found", 2, visitor.getTagCount(0)); } ! public void testMultipleTags() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor( new String [] { --- 61,67 ---- assertEquals("LI tags found", 2, visitor.getTagCount(0)); } ! public void testMultipleTags() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor( new String [] { *************** *** 77,81 **** public void testEndTags() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor( new String [] { --- 77,81 ---- public void testEndTags() throws Exception { ! TagFindingVisitor visitor = new TagFindingVisitor( new String [] { Index: TextExtractingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/TextExtractingVisitorTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** TextExtractingVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.7 --- TextExtractingVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 48,52 **** ); } ! public void testSimpleVisitWithRegisteredScanners() throws Exception { createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>"); --- 48,52 ---- ); } ! public void testSimpleVisitWithRegisteredScanners() throws Exception { createParser("<HTML><HEAD><TITLE>Hello World</TITLE></HEAD></HTML>"); *************** *** 60,64 **** ); } ! public void testVisitHtmlWithSpecialChars() throws Exception { createParser("<BODY>Hello World </BODY>"); --- 60,64 ---- ); } ! public void testVisitHtmlWithSpecialChars() throws Exception { createParser("<BODY>Hello World </BODY>"); *************** *** 71,75 **** ); } ! public void testVisitHtmlWithPreTags() throws Exception { createParser( --- 71,75 ---- ); } ! public void testVisitHtmlWithPreTags() throws Exception { createParser( Index: UrlModifyingVisitorTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/visitorsTests/UrlModifyingVisitorTest.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** UrlModifyingVisitorTest.java 8 Sep 2003 02:26:32 -0000 1.7 --- UrlModifyingVisitorTest.java 10 Sep 2003 03:38:25 -0000 1.8 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 34,44 **** public class UrlModifyingVisitorTest extends ParserTestCase { ! private static final String HTML_WITH_LINK = "<HTML><BODY>" + "<A HREF=\"mylink.html\"><IMG SRC=\"mypic.jpg\">" + "</A><IMG SRC=\"mysecondimage.gif\">" + "</BODY></HTML>"; ! ! private static final String MODIFIED_HTML = "<HTML><BODY>" + "<A HREF=\"localhost://mylink.html\">" + --- 34,44 ---- public class UrlModifyingVisitorTest extends ParserTestCase { ! private static final String HTML_WITH_LINK = "<HTML><BODY>" + "<A HREF=\"mylink.html\"><IMG SRC=\"mypic.jpg\">" + "</A><IMG SRC=\"mysecondimage.gif\">" + "</BODY></HTML>"; ! ! private static final String MODIFIED_HTML = "<HTML><BODY>" + "<A HREF=\"localhost://mylink.html\">" + *************** *** 46,58 **** "<IMG SRC=\"localhost://mysecondimage.gif\">" + "</BODY></HTML>"; ! public UrlModifyingVisitorTest(String name) { super(name); } ! public void testUrlModificationWithVisitor() throws Exception { Parser parser = Parser.createParser(HTML_WITH_LINK); ! UrlModifyingVisitor visitor = ! new UrlModifyingVisitor(parser, "localhost://"); parser.visitAllNodesWith(visitor); assertStringEquals("Expected HTML", --- 46,58 ---- "<IMG SRC=\"localhost://mysecondimage.gif\">" + "</BODY></HTML>"; ! public UrlModifyingVisitorTest(String name) { super(name); } ! public void testUrlModificationWithVisitor() throws Exception { Parser parser = Parser.createParser(HTML_WITH_LINK); ! UrlModifyingVisitor visitor = ! new UrlModifyingVisitor(parser, "localhost://"); parser.visitAllNodesWith(visitor); assertStringEquals("Expected HTML", |
From: <der...@us...> - 2003-09-10 03:54:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests/temporaryFailures Modified Files: AttributeParserTest.java TagParserTest.java Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AttributeParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures/AttributeParserTest.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** AttributeParserTest.java 8 Sep 2003 02:26:31 -0000 1.12 --- AttributeParserTest.java 10 Sep 2003 03:38:25 -0000 1.13 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 47,51 **** private Tag tag; private Hashtable table; ! public AttributeParserTest(String name) { super(name); --- 47,51 ---- private Tag tag; private Hashtable table; ! public AttributeParserTest(String name) { super(name); *************** *** 55,65 **** parser = new AttributeParser(); } ! public void getParameterTableFor(String tagContents) { tag = new Tag(new TagData(0,0,tagContents,"")); table = parser.parseAttributes(tag.getText ()); ! } ! public void testParseParameters() { getParameterTableFor("a b = \"c\""); --- 55,65 ---- parser = new AttributeParser(); } ! public void getParameterTableFor(String tagContents) { tag = new Tag(new TagData(0,0,tagContents,"")); table = parser.parseAttributes(tag.getText ()); ! } ! public void testParseParameters() { getParameterTableFor("a b = \"c\""); *************** *** 71,78 **** assertEquals("Value","'",table.get("B")); } ! public void testParseEmptyValues() { getParameterTableFor("a b = \"\""); ! assertEquals("Value","",table.get("B")); } --- 71,78 ---- assertEquals("Value","'",table.get("B")); } ! public void testParseEmptyValues() { getParameterTableFor("a b = \"\""); ! assertEquals("Value","",table.get("B")); } *************** *** 80,90 **** getParameterTableFor("a b\"c\""); assertEquals("ValueB",null,table.get("B")); ! } ! public void testTwoParams(){ getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\">\n"); assertEquals("Param1","Param1",table.get("NAME")); ! assertEquals("Somik","Somik",table.get("VALUE")); } --- 80,90 ---- getParameterTableFor("a b\"c\""); assertEquals("ValueB",null,table.get("B")); ! } ! public void testTwoParams(){ getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\">\n"); assertEquals("Param1","Param1",table.get("NAME")); ! assertEquals("Somik","Somik",table.get("VALUE")); } *************** *** 92,102 **** getParameterTableFor("PARAM NAME=Param1 VALUE=Somik"); assertEquals("Param1","Param1",table.get("NAME")); ! assertEquals("Somik","Somik",table.get("VALUE")); } ! public void testValueMissing() { getParameterTableFor("INPUT type=\"checkbox\" name=\"Authorize\" value=\"Y\" checked"); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); ! assertEquals("Type","checkbox",table.get("TYPE")); assertEquals("Name","Authorize",table.get("NAME")); assertEquals("Value","Y",table.get("VALUE")); --- 92,102 ---- getParameterTableFor("PARAM NAME=Param1 VALUE=Somik"); assertEquals("Param1","Param1",table.get("NAME")); ! assertEquals("Somik","Somik",table.get("VALUE")); } ! public void testValueMissing() { getParameterTableFor("INPUT type=\"checkbox\" name=\"Authorize\" value=\"Y\" checked"); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); ! assertEquals("Type","checkbox",table.get("TYPE")); assertEquals("Name","Authorize",table.get("NAME")); assertEquals("Value","Y",table.get("VALUE")); *************** *** 105,109 **** /** ! * This is a simulation of a bug reported by Dhaval Udani - wherein * a space before the end of the tag causes a problem - there is a key * in the table with just a space in it and an empty value --- 105,109 ---- /** ! * This is a simulation of a bug reported by Dhaval Udani - wherein * a space before the end of the tag causes a problem - there is a key * in the table with just a space in it and an empty value *************** *** 124,130 **** getParameterTableFor("INPUT type="); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); ! assertEquals("Type","",table.get("TYPE")); } ! public void testAttributeWithSpuriousEqualTo() { getParameterTableFor( --- 124,130 ---- getParameterTableFor("INPUT type="); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); ! assertEquals("Type","",table.get("TYPE")); } ! public void testAttributeWithSpuriousEqualTo() { getParameterTableFor( *************** *** 137,141 **** ); } ! public void testQuestionMarksInAttributes() { getParameterTableFor( --- 137,141 ---- ); } ! public void testQuestionMarksInAttributes() { getParameterTableFor( *************** *** 187,191 **** } } ! /** * Test Script in attributes. --- 187,191 ---- } } ! /** * Test Script in attributes. *************** *** 204,208 **** } } ! /** * Test that stand-alone attributes are kept that way, rather than being --- 204,208 ---- } } ! /** * Test that stand-alone attributes are kept that way, rather than being Index: TagParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures/TagParserTest.java,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** TagParserTest.java 8 Sep 2003 02:26:31 -0000 1.10 --- TagParserTest.java 10 Sep 2003 03:38:25 -0000 1.11 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic, Inc. ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic, Inc. ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 82,94 **** private Map results; private int testProgress; ! public TagParserTest(String name) { super(name); } ! public void testTagWithQuotes() throws Exception { ! String testHtml = "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; ! createParser(testHtml); parseAndAssertNodeCount(1); --- 82,94 ---- private Map results; private int testProgress; ! public TagParserTest(String name) { super(name); } ! public void testTagWithQuotes() throws Exception { ! String testHtml = "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; ! createParser(testHtml); parseAndAssertNodeCount(1); *************** *** 102,106 **** ); } ! public void testEmptyTag() throws Exception { createParser("<custom/>"); --- 102,106 ---- ); } ! public void testEmptyTag() throws Exception { createParser("<custom/>"); *************** *** 116,120 **** ); } ! public void testTagWithCloseTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a>b\">"); --- 116,120 ---- ); } ! public void testTagWithCloseTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a>b\">"); *************** *** 124,128 **** assertStringEquals("attribute","a>b",tag.getAttribute("att")); } ! public void testTagWithOpenTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a<b\">"); --- 124,128 ---- assertStringEquals("attribute","a>b",tag.getAttribute("att")); } ! public void testTagWithOpenTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a<b\">"); *************** *** 251,255 **** } } ! /** * Test multiline split tag like attribute. --- 251,255 ---- } } ! /** * Test multiline split tag like attribute. *************** *** 295,311 **** String testHtml2 = "<a href=\"http://normallink.com/sometext.html\">" + TEST_HTML; ! ParsingThread parsingThread [] = new ParsingThread[100]; results = new HashMap(); testProgress = 0; for (int i=0;i<parsingThread.length;i++) { ! if (i<parsingThread.length/2) ! parsingThread[i] = new ParsingThread(i,testHtml1,parsingThread.length); else ! parsingThread[i] = new ParsingThread(i,testHtml2,parsingThread.length); ! Thread thread = new Thread(parsingThread[i]); thread.start(); } --- 295,311 ---- String testHtml2 = "<a href=\"http://normallink.com/sometext.html\">" + TEST_HTML; ! ParsingThread parsingThread [] = new ParsingThread[100]; results = new HashMap(); testProgress = 0; for (int i=0;i<parsingThread.length;i++) { ! if (i<parsingThread.length/2) ! parsingThread[i] = new ParsingThread(i,testHtml1,parsingThread.length); else ! parsingThread[i] = new ParsingThread(i,testHtml2,parsingThread.length); ! Thread thread = new Thread(parsingThread[i]); thread.start(); } *************** *** 319,323 **** catch (InterruptedException e) { } ! } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { --- 319,323 ---- catch (InterruptedException e) { } ! } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { *************** *** 348,352 **** parsingThread[i].getLink2().getLink() ); ! } } } --- 348,352 ---- parsingThread[i].getLink2().getLink() ); ! } } } *************** *** 358,362 **** return numThreads * (numThreads - 1) / 2; } ! class ParsingThread implements Runnable { Parser parser; --- 358,362 ---- return numThreads * (numThreads - 1) / 2; } ! class ParsingThread implements Runnable { Parser parser; *************** *** 365,377 **** boolean result; int max; ! ParsingThread(int id, String testHtml, int max) { this.id = id; this.max = max; ! this.parser = Parser.createParser(testHtml); parser.registerScanners(); } ! public void run() { try { --- 365,377 ---- boolean result; int max; ! ParsingThread(int id, String testHtml, int max) { this.id = id; this.max = max; ! this.parser = Parser.createParser(testHtml); parser.registerScanners(); } ! public void run() { try { *************** *** 398,415 **** } } ! public LinkTag getLink1() { return link1; } ! public LinkTag getLink2() { return link2; } ! public boolean passed() { return result; } } ! /** * Test the toHTML method for a standalone attribute. --- 398,415 ---- } } ! public LinkTag getLink1() { return link1; } ! public LinkTag getLink2() { return link2; } ! public boolean passed() { return result; } } ! /** * Test the toHTML method for a standalone attribute. *************** *** 424,428 **** assertStringEquals ("html","<INPUT DISABLED>", html); } ! /** * Test the toHTML method for a missing value attribute. --- 424,428 ---- assertStringEquals ("html","<INPUT DISABLED>", html); } ! /** * Test the toHTML method for a missing value attribute. *************** *** 437,440 **** assertStringEquals ("html","<INPUT DISABLED=>", html); } ! } --- 437,440 ---- assertStringEquals ("html","<INPUT DISABLED=>", html); } ! } |
From: <der...@us...> - 2003-09-10 03:49:03
|
Update of /cvsroot/htmlparser/htmlparser In directory sc8-pr-cvs1:/tmp/cvs-serv26771 Added Files: java.header Log Message: Forgot the Java header template file. Oops. --- NEW FILE: java.header --- // HTMLParser Library \$Name: $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright \(C\) \d\d\d\d .* // // Revision Control Information // // \$Source: /cvsroot/htmlparser/htmlparser/java.header,v $ // \$Author: derrickoswald $ // \$Date: 2003/09/10 03:48:58 $ // \$Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or \(at your option\) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/scanners Modified Files: AppletScanner.java BaseHrefScanner.java BodyScanner.java BulletListScanner.java BulletScanner.java CompositeTagScanner.java DivScanner.java DoctypeScanner.java FormScanner.java FrameScanner.java FrameSetScanner.java HeadScanner.java HtmlScanner.java ImageScanner.java InputTagScanner.java JspScanner.java LabelScanner.java LinkScanner.java MetaTagScanner.java OptionTagScanner.java ScriptScanner.java SelectTagScanner.java SpanScanner.java StyleScanner.java TableColumnScanner.java TableRowScanner.java TableScanner.java TagScanner.java TextareaTagScanner.java TitleScanner.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AppletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/AppletScanner.java,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** AppletScanner.java 8 Sep 2003 02:26:29 -0000 1.30 --- AppletScanner.java 10 Sep 2003 03:38:19 -0000 1.31 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,48 **** public class AppletScanner extends CompositeTagScanner { private static String [] MATCH_STRING = {"APPLET"}; ! public AppletScanner() { super(MATCH_STRING); } ! public AppletScanner(String filter) { super(filter,MATCH_STRING); --- 40,48 ---- public class AppletScanner extends CompositeTagScanner { private static String [] MATCH_STRING = {"APPLET"}; ! public AppletScanner() { super(MATCH_STRING); } ! public AppletScanner(String filter) { super(filter,MATCH_STRING); *************** *** 55,59 **** public Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException { ! return new AppletTag(tagData,compositeTagData); } --- 55,59 ---- public Tag createTag(TagData tagData, CompositeTagData compositeTagData) throws ParserException { ! return new AppletTag(tagData,compositeTagData); } Index: BaseHrefScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BaseHrefScanner.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** BaseHrefScanner.java 8 Sep 2003 02:26:29 -0000 1.24 --- BaseHrefScanner.java 10 Sep 2003 03:38:19 -0000 1.25 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 60,64 **** absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); processor.setBaseUrl(absoluteBaseUrl); ! } return new BaseHrefTag(tagData,absoluteBaseUrl); } --- 60,64 ---- absoluteBaseUrl = LinkProcessor.removeLastSlash(baseUrl.trim()); processor.setBaseUrl(absoluteBaseUrl); ! } return new BaseHrefTag(tagData,absoluteBaseUrl); } Index: BodyScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BodyScanner.java,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** BodyScanner.java 8 Sep 2003 02:26:29 -0000 1.16 --- BodyScanner.java 10 Sep 2003 03:38:19 -0000 1.17 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 46,50 **** this(""); } ! public BodyScanner(String filter) { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); --- 46,50 ---- this(""); } ! public BodyScanner(String filter) { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); *************** *** 54,58 **** return MATCH_NAME; } ! public Tag createTag( TagData tagData, --- 54,58 ---- return MATCH_NAME; } ! public Tag createTag( TagData tagData, Index: BulletListScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletListScanner.java,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** BulletListScanner.java 8 Sep 2003 02:26:29 -0000 1.15 --- BulletListScanner.java 10 Sep 2003 03:38:19 -0000 1.16 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** private final static String ENDERS [] = { "BODY", "HTML" }; private Stack ulli = new Stack(); ! public BulletListScanner(Parser parser) { this("",parser); --- 43,47 ---- private final static String ENDERS [] = { "BODY", "HTML" }; private Stack ulli = new Stack(); ! public BulletListScanner(Parser parser) { this("",parser); *************** *** 61,65 **** return MATCH_STRING; } ! public void beforeScanningStarts() { ulli.push(this); --- 61,65 ---- return MATCH_STRING; } ! public void beforeScanningStarts() { ulli.push(this); Index: BulletScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/BulletScanner.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** BulletScanner.java 8 Sep 2003 02:26:29 -0000 1.20 --- BulletScanner.java 10 Sep 2003 03:38:19 -0000 1.21 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,48 **** * [1] A <ul> can have <li> under it<br> * [2] A <li> can have <ul> under it<br> ! * [3] A <li> cannot have <li> under it<br> * <p> ! * These rules are implemented easily through the shared stack. */ public class BulletScanner extends CompositeTagScanner { --- 42,48 ---- * [1] A <ul> can have <li> under it<br> * [2] A <li> can have <ul> under it<br> ! * [3] A <li> cannot have <li> under it<br> * <p> ! * These rules are implemented easily through the shared stack. */ public class BulletScanner extends CompositeTagScanner { *************** *** 51,55 **** private final static String END_TAG_ENDERS [] = { "UL" }; private Stack ulli; ! public BulletScanner(Stack ulli) { this("",ulli); --- 51,55 ---- private final static String END_TAG_ENDERS [] = { "UL" }; private Stack ulli; ! public BulletScanner(Stack ulli) { this("",ulli); *************** *** 69,73 **** return MATCH_STRING; } ! /** * This is the logic that decides when a bullet tag can be allowed --- 69,73 ---- return MATCH_STRING; } ! /** * This is the logic that decides when a bullet tag can be allowed *************** *** 79,83 **** ulli.pop(); return true; ! } else return false; } --- 79,83 ---- ulli.pop(); return true; ! } else return false; } Index: CompositeTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v retrieving revision 1.65 retrieving revision 1.66 diff -C2 -d -r1.65 -r1.66 *** CompositeTagScanner.java 8 Sep 2003 02:26:29 -0000 1.65 --- CompositeTagScanner.java 10 Sep 2003 03:38:19 -0000 1.66 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 47,52 **** * <li>Tags which will trigger a match</li> * <li>Tags which when encountered before a legal end tag, should force a correction</li> ! * <li>Preventing more tags of its own type to appear as children ! * </ul> * Here are examples of each:<BR> * <B>Tags which will trigger a match</B> --- 47,52 ---- * <li>Tags which will trigger a match</li> * <li>Tags which when encountered before a legal end tag, should force a correction</li> ! * <li>Preventing more tags of its own type to appear as children ! * </ul> * Here are examples of each:<BR> * <B>Tags which will trigger a match</B> *************** *** 77,81 **** * <B>Preventing children of same type</B> * This is useful when you know that a certain tag can never hold children of its own type. ! * e.g. <FORM> can never have more form tags within it. If it does, it is an error and should * be corrected. The default behavior is to allow nesting. * <pre> --- 77,81 ---- * <B>Preventing children of same type</B> * This is useful when you know that a certain tag can never hold children of its own type. ! * e.g. <FORM> can never have more form tags within it. If it does, it is an error and should * be corrected. The default behavior is to allow nesting. * <pre> *************** *** 98,102 **** private Set endTagEnderSet; private boolean balance_quotes; ! public CompositeTagScanner(String [] nameOfTagToMatch) { this(nameOfTagToMatch,new String[] {}); --- 98,102 ---- private Set endTagEnderSet; private boolean balance_quotes; ! public CompositeTagScanner(String [] nameOfTagToMatch) { this(nameOfTagToMatch,new String[] {}); *************** *** 120,126 **** public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); --- 120,126 ---- public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, boolean allowSelfChildren) { this(filter,nameOfTagToMatch,tagEnders,new String[] {}, allowSelfChildren); *************** *** 128,134 **** public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, String [] endTagEnders, boolean allowSelfChildren) --- 128,134 ---- public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, String [] endTagEnders, boolean allowSelfChildren) *************** *** 159,165 **** */ public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, String [] endTagEnders, boolean allowSelfChildren, --- 159,165 ---- */ public CompositeTagScanner( ! String filter, ! String [] nameOfTagToMatch, ! String [] tagEnders, String [] endTagEnders, boolean allowSelfChildren, *************** *** 178,182 **** public Tag scan(Tag tag, String url, NodeReader reader,String currLine) throws ParserException { ! CompositeTagScannerHelper helper = new CompositeTagScannerHelper(this,tag,url,reader,currLine,balance_quotes); return helper.scan(); --- 178,182 ---- public Tag scan(Tag tag, String url, NodeReader reader,String currLine) throws ParserException { ! CompositeTagScannerHelper helper = new CompositeTagScannerHelper(this,tag,url,reader,currLine,balance_quotes); return helper.scan(); *************** *** 186,196 **** * Override this method if you wish to create any data structures or do anything * before the start of the scan. This is just after a tag has triggered the scanner ! * but before the scanner begins its processing. */ public void beforeScanningStarts() { } ! /** ! * This method is called everytime a child to the composite is found. It is useful when we * need to store special children seperately. Though, all children are collected anyway into a node list. */ --- 186,196 ---- * Override this method if you wish to create any data structures or do anything * before the start of the scan. This is just after a tag has triggered the scanner ! * but before the scanner begins its processing. */ public void beforeScanningStarts() { } ! /** ! * This method is called everytime a child to the composite is found. It is useful when we * need to store special children seperately. Though, all children are collected anyway into a node list. */ *************** *** 205,211 **** public final boolean isTagToBeEndedFor(Tag tag) { ! boolean isEndTag = tag instanceof EndTag; String tagName = tag.getTagName(); ! if ( ( isEndTag && endTagEnderSet.contains(tagName)) || (!isEndTag && tagEnderSet.contains(tagName)) --- 205,211 ---- public final boolean isTagToBeEndedFor(Tag tag) { ! boolean isEndTag = tag instanceof EndTag; String tagName = tag.getTagName(); ! if ( ( isEndTag && endTagEnderSet.contains(tagName)) || (!isEndTag && tagEnderSet.contains(tagName)) *************** *** 219,223 **** /** ! * Override this method to implement scanner logic that determines if the current scanner is * to be allowed. This is useful when there are rules which dont allow recursive tags of the same * type. @see BulletScanner --- 219,223 ---- /** ! * Override this method to implement scanner logic that determines if the current scanner is * to be allowed. This is useful when there are rules which dont allow recursive tags of the same * type. @see BulletScanner Index: DivScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DivScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** DivScanner.java 8 Sep 2003 02:26:29 -0000 1.28 --- DivScanner.java 10 Sep 2003 03:38:19 -0000 1.29 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 36,40 **** public class DivScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"DIV"}; ! public DivScanner() { this(""); --- 36,40 ---- public class DivScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"DIV"}; ! public DivScanner() { this(""); Index: DoctypeScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/DoctypeScanner.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** DoctypeScanner.java 8 Sep 2003 02:26:29 -0000 1.25 --- DoctypeScanner.java 10 Sep 2003 03:38:19 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 57,61 **** protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String tagContents = tag.getText(); tagContents=tagContents.substring(9,tagContents.length()); tagData.setTagContents(tagContents); --- 57,61 ---- protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { ! String tagContents = tag.getText(); tagContents=tagContents.substring(9,tagContents.length()); tagData.setTagContents(tagContents); Index: FormScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FormScanner.java,v retrieving revision 1.43 retrieving revision 1.44 diff -C2 -d -r1.43 -r1.44 *** FormScanner.java 8 Sep 2003 02:26:29 -0000 1.43 --- FormScanner.java 10 Sep 2003 03:38:19 -0000 1.44 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 51,55 **** { private static final String [] MATCH_ID = { "FORM" }; ! public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; private boolean linkScannerAlreadyOpen=false; private static final String [] formTagEnders = {"HTML","BODY" --- 51,55 ---- { private static final String [] MATCH_ID = { "FORM" }; ! public static final String PREVIOUS_DIRTY_LINK_MESSAGE="Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this.."; private boolean linkScannerAlreadyOpen=false; private static final String [] formTagEnders = {"HTML","BODY" *************** *** 73,77 **** parser.addScanner(new OptionTagScanner("-option",stack)); } ! /** * Extract the location of the image, given the string to be parsed, and the url --- 73,77 ---- parser.addScanner(new OptionTagScanner("-option",stack)); } ! /** * Extract the location of the image, given the string to be parsed, and the url *************** *** 145,149 **** public boolean evaluate(String s, TagScanner previousOpenScanner) { if (previousOpenScanner instanceof LinkScanner) { ! linkScannerAlreadyOpen = true; StringBuffer msg= new StringBuffer(); msg.append("<"); --- 145,149 ---- public boolean evaluate(String s, TagScanner previousOpenScanner) { if (previousOpenScanner instanceof LinkScanner) { ! linkScannerAlreadyOpen = true; StringBuffer msg= new StringBuffer(); msg.append("<"); *************** *** 153,162 **** feedback.warning(msg.toString()); // This is dirty HTML. Assume the current tag is ! // not a new link tag - but an end tag. This is actually a really wild bug - // Internet Explorer actually parses such tags. // So - we shall then proceed to fool the scanner into sending an endtag of type </A> // For this - set the dirty flag to true and return } ! else linkScannerAlreadyOpen = false; return super.evaluate(s, previousOpenScanner); --- 153,162 ---- feedback.warning(msg.toString()); // This is dirty HTML. Assume the current tag is ! // not a new link tag - but an end tag. This is actually a really wild bug - // Internet Explorer actually parses such tags. // So - we shall then proceed to fool the scanner into sending an endtag of type </A> // For this - set the dirty flag to true and return } ! else linkScannerAlreadyOpen = false; return super.evaluate(s, previousOpenScanner); *************** *** 166,170 **** throws ParserException { String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) --- 166,170 ---- throws ParserException { String formUrl = extractFormLocn(compositeTagData.getStartTag(),tagData.getUrlBeingParsed()); ! if (formUrl!=null && formUrl.length()>0) compositeTagData.getStartTag().setAttribute("ACTION",formUrl); if (!stack.empty () && (this == stack.peek ())) Index: FrameScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameScanner.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** FrameScanner.java 8 Sep 2003 02:26:29 -0000 1.27 --- FrameScanner.java 10 Sep 2003 03:38:19 -0000 1.28 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 80,87 **** String msg; if (tag!=null) msg = tag.getText(); else msg = "null"; ! throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); } } ! --- 80,87 ---- String msg; if (tag!=null) msg = tag.getText(); else msg = "null"; ! throw new ParserException("HTMLFrameScanner.extractFrameLocn() : Error in extracting frame location from tag "+msg,e); } } ! *************** *** 102,106 **** String frameUrl = extractFrameLocn(tag,url); String frameName = extractFrameName(tag,url); ! return new FrameTag(tagData,frameUrl,frameName); } --- 102,106 ---- String frameUrl = extractFrameLocn(tag,url); String frameName = extractFrameName(tag,url); ! return new FrameTag(tagData,frameUrl,frameName); } Index: FrameSetScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/FrameSetScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** FrameSetScanner.java 8 Sep 2003 02:26:29 -0000 1.26 --- FrameSetScanner.java 10 Sep 2003 03:38:19 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 48,52 **** { private static final String MATCH_NAME [] = {"FRAMESET"}; ! public FrameSetScanner() { --- 48,52 ---- { private static final String MATCH_NAME [] = {"FRAMESET"}; ! public FrameSetScanner() { Index: HeadScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HeadScanner.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** HeadScanner.java 8 Sep 2003 02:26:29 -0000 1.13 --- HeadScanner.java 10 Sep 2003 03:38:19 -0000 1.14 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 45,49 **** this(""); } ! public HeadScanner(String filter) { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); --- 45,49 ---- this(""); } ! public HeadScanner(String filter) { super(filter,MATCH_NAME,ENDERS,END_TAG_ENDERS,false); *************** *** 53,57 **** return MATCH_NAME; } ! public Tag createTag( TagData tagData, --- 53,57 ---- return MATCH_NAME; } ! public Tag createTag( TagData tagData, Index: HtmlScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/HtmlScanner.java,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** HtmlScanner.java 8 Sep 2003 02:26:29 -0000 1.28 --- HtmlScanner.java 10 Sep 2003 03:38:19 -0000 1.29 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 36,40 **** public class HtmlScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"HTML"}; ! public HtmlScanner() { this(""); --- 36,40 ---- public class HtmlScanner extends CompositeTagScanner { private static String MATCH_STRING [] = {"HTML"}; ! public HtmlScanner() { this(""); Index: ImageScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/ImageScanner.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** ImageScanner.java 8 Sep 2003 02:26:29 -0000 1.26 --- ImageScanner.java 10 Sep 2003 03:38:19 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 41,45 **** import org.htmlparser.util.ParserUtils; /** ! * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a * variant of the template method. If the evaluate() method returns true, that means the * given string contains an image tag. Extraction is done by the scan method thereafter --- 41,45 ---- import org.htmlparser.util.ParserUtils; /** ! * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a * variant of the template method. If the evaluate() method returns true, that means the * given string contains an image tag. Extraction is done by the scan method thereafter *************** *** 60,65 **** } /** ! * Overriding the constructor to accept the filter ! */ public ImageScanner(String filter,LinkProcessor processor) { --- 60,65 ---- } /** ! * Overriding the constructor to accept the filter ! */ public ImageScanner(String filter,LinkProcessor processor) { *************** *** 92,100 **** table = tag.redoParseAttributes(); relativeLink = (String) table.get("SRC"); ! ! } } if (relativeLink==null) return ""; else ! return processor.extract(relativeLink,url); } catch (Exception e) { --- 92,100 ---- table = tag.redoParseAttributes(); relativeLink = (String) table.get("SRC"); ! ! } } if (relativeLink==null) return ""; else ! return processor.extract(relativeLink,url); } catch (Exception e) { *************** *** 102,106 **** } } ! public String [] getID() { String [] ids = new String[1]; --- 102,106 ---- } } ! public String [] getID() { String [] ids = new String[1]; Index: InputTagScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/InputTagScanner.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** InputTagScanner.java 8 Sep 2003 02:26:29 -0000 1.24 --- InputTagScanner.java 10 Sep 2003 03:38:19 -0000 1.25 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,44 **** super(); } ! public InputTagScanner(String filter) { --- 40,44 ---- super(); } ! public InputTagScanner(String filter) { *************** *** 51,55 **** return ids; } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { --- 51,55 ---- return ids; } ! protected Tag createTag(TagData tagData, Tag tag, String url) throws ParserException { Index: JspScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/JspScanner.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** JspScanner.java 8 Sep 2003 02:26:29 -0000 1.25 --- JspScanner.java 10 Sep 2003 03:38:19 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: LabelScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LabelScanner.java,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** LabelScanner.java 8 Sep 2003 02:26:29 -0000 1.31 --- LabelScanner.java 10 Sep 2003 03:38:19 -0000 1.32 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** super(MATCH_NAME,new String [] {},false); } ! public LabelScanner(String filter) { super(filter,MATCH_NAME,new String [] {},false); --- 43,47 ---- super(MATCH_NAME,new String [] {},false); } ! public LabelScanner(String filter) { super(filter,MATCH_NAME,new String [] {},false); *************** *** 51,55 **** return MATCH_NAME; } ! public Tag createTag( TagData tagData, --- 51,55 ---- return MATCH_NAME; } ! public Tag createTag( TagData tagData, Index: LinkScanner.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/LinkScanner.java,v retrieving revision 1.51 retrieving revision 1.52 diff -C2 -d -r1.51 -r1.52 *** LinkScanner.java 8 Sep 2003 02:26:29 -0000 1.51 --- LinkScanner.java 10 Sep 2003 03:38:19 -0000 1.52 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 44,48 **** import org.htmlparser.util.ParserUtils; /** ! * Scans for the Link Tag. This is a subclass of TagScanner, and is called using a * variant of the template method. If the evaluate() method returns true, that means the * given string contains an image tag. Extraction is done by the scan method thereafter --- 44,48 ---- import org.htmlparser.util.ParserUtils; /** ! * Scans for the Link Tag. This is a subclass of TagScanner, and is called using a * variant of the template method. If the evaluate() method returns true, that means the * given string contains an image tag. Extraction is done by the scan method thereafter *************** *** 57,61 **** private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! /** * Overriding the default constructor --- 57,61 ---- private final static String ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; private final static String ENDTAG_ENDERS [] = { "TD","TR","FORM","LI","BODY", "HTML" }; ! /** * Overriding the default constructor *************** *** 64,76 **** this(""); } ! /** ! * Overriding the constructor to accept the filter */ public LinkScanner(String filter) { super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); ! processor = new LinkProcessor(); } ! public Tag createTag( TagData tagData, --- 64,76 ---- this(""); } ! /** ! * Overriding the constructor to accept the filter */ public LinkScanner(String filter) { super(filter,MATCH_NAME,ENDERS,ENDTAG_ENDERS, false); ! processor = new LinkProcessor(); } ! public Tag createTag( TagData tagData, *************** *** 85,90 **** mailto = link.indexOf(":"); link = link.substring(mailto+1); ! mailLink = true; ! } int javascript = link.indexOf("javascript:"); boolean javascriptLink = false; --- 85,90 ---- mailto = link.indexOf(":"); link = link.substring(mailto+1); ! mailLink = true; ! } int javascript = link.indexOf("javascript:"); boolean javascriptLink = false; *************** *** 92,99 **** link = link.substring(11); // this magic number is "javascript:".length() javascriptLink = true; ! } String accessKey = getAccessKey(compositeTagData.getStartTag()); String myLinkText = compositeTagData.getChildren().toString(); ! LinkTag linkTag = new LinkTag( tagData, --- 92,99 ---- link = link.substring(11); // this magic number is "javascript:".length() javascriptLink = true; ! } String accessKey = getAccessKey(compositeTagData.getStartTag()); String myLinkText = compositeTagData.getChildren().toString(); ! LinkTag linkTag = new LinkTag( tagData, *************** *** 110,116 **** return linkTag; } ! /** ! * Template Method, used to decide if this scanner can handle the Link tag type. If * the evaluation returns true, the calling side makes a call to scan(). * @param s The complete text contents of the Tag. --- 110,116 ---- return linkTag; } ! /** ! * Template Method, used to decide if this scanner can handle the Link tag type. If * the evaluation returns true, the calling side makes a call to scan(). * @param s The complete text contents of the Tag. *************** *** 122,126 **** char ch; boolean ret; ! // eat up leading blanks s = absorbLeadingBlanks (s); --- 122,126 ---- char ch; boolean ret; ! // eat up leading blanks s = absorbLeadingBlanks (s); *************** *** 140,145 **** /** ! * Extract the link from the ... [truncated message content] |
From: <der...@us...> - 2003-09-10 03:39:08
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tests Modified Files: AllTests.java AssertXmlEqualsTest.java BadTagIdentifier.java FunctionalTests.java InstanceofPerformanceTest.java LineNumberAssignedByNodeReaderTest.java ParserTest.java ParserTestCase.java PerformanceTest.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AllTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AllTests.java,v retrieving revision 1.49 retrieving revision 1.50 diff -C2 -d -r1.49 -r1.50 *** AllTests.java 8 Sep 2003 02:26:30 -0000 1.49 --- AllTests.java 10 Sep 2003 03:38:23 -0000 1.50 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 31,35 **** import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { --- 31,35 ---- import junit.framework.TestSuite; ! public class AllTests extends junit.framework.TestCase { *************** *** 115,119 **** suite.addTest(LineNumberAssignedByNodeReaderTest.suite()); return suite; ! } } --- 115,119 ---- suite.addTest(LineNumberAssignedByNodeReaderTest.suite()); return suite; ! } } Index: AssertXmlEqualsTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/AssertXmlEqualsTest.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** AssertXmlEqualsTest.java 8 Sep 2003 02:26:30 -0000 1.11 --- AssertXmlEqualsTest.java 10 Sep 2003 03:38:23 -0000 1.12 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,49 **** super(name); } ! public void testNestedTagWithText() throws Exception { assertXmlEquals("nested with text","<hello> <hi>My name is Nothing</hi></hello>","<hello><hi>My name is Nothing</hi> </hello>"); } ! public void testThreeTagsDifferent() throws Exception { assertXmlEquals("two tags different","<someTag></someTag><someOtherTag>","<someTag/><someOtherTag>"); } ! public void testOneTag() throws Exception { assertXmlEquals("one tag","<someTag>","<someTag>"); --- 37,49 ---- super(name); } ! public void testNestedTagWithText() throws Exception { assertXmlEquals("nested with text","<hello> <hi>My name is Nothing</hi></hello>","<hello><hi>My name is Nothing</hi> </hello>"); } ! public void testThreeTagsDifferent() throws Exception { assertXmlEquals("two tags different","<someTag></someTag><someOtherTag>","<someTag/><someOtherTag>"); } ! public void testOneTag() throws Exception { assertXmlEquals("one tag","<someTag>","<someTag>"); *************** *** 57,77 **** assertXmlEquals("two tags different","<someTag></someTag>","<someTag/>"); } ! public void testTwoTagsDifferent2() throws Exception { assertXmlEquals("two tags different","<someTag/>","<someTag></someTag>"); } ! public void testTwoTagsWithSameAttributes() throws Exception { assertXmlEquals("attributes","<tag name=\"John\" age=\"22\" sex=\"M\"/>","<tag sex=\"M\" name=\"John\" age=\"22\"/>"); } ! public void testTagWithText() throws Exception { assertXmlEquals("text","<hello> My name is Nothing</hello>","<hello>My name is Nothing </hello>"); } ! public void testStringWithLineBreaks() throws Exception { assertXmlEquals("string with line breaks","testing & refactoring","testing &\nrefactoring"); } ! public static TestSuite suite() { TestSuite suite = new TestSuite("XML Tests"); --- 57,77 ---- assertXmlEquals("two tags different","<someTag></someTag>","<someTag/>"); } ! public void testTwoTagsDifferent2() throws Exception { assertXmlEquals("two tags different","<someTag/>","<someTag></someTag>"); } ! public void testTwoTagsWithSameAttributes() throws Exception { assertXmlEquals("attributes","<tag name=\"John\" age=\"22\" sex=\"M\"/>","<tag sex=\"M\" name=\"John\" age=\"22\"/>"); } ! public void testTagWithText() throws Exception { assertXmlEquals("text","<hello> My name is Nothing</hello>","<hello>My name is Nothing </hello>"); } ! public void testStringWithLineBreaks() throws Exception { assertXmlEquals("string with line breaks","testing & refactoring","testing &\nrefactoring"); } ! public static TestSuite suite() { TestSuite suite = new TestSuite("XML Tests"); Index: BadTagIdentifier.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/BadTagIdentifier.java,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** BadTagIdentifier.java 8 Sep 2003 02:26:30 -0000 1.11 --- BadTagIdentifier.java 10 Sep 2003 03:38:23 -0000 1.12 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 38,53 **** } ! public static void main(String[] args) throws Exception { ! BadTagIdentifier badTags = new BadTagIdentifier(); badTags.identify("http://www.amazon.com"); } ! ! private void identify(String url) throws Exception{ ! String [] tagsBeingChecked = {"TABLE","DIV","SPAN"}; ! Parser parser = new Parser(url); --- 38,53 ---- } ! public static void main(String[] args) throws Exception { ! BadTagIdentifier badTags = new BadTagIdentifier(); badTags.identify("http://www.amazon.com"); } ! ! private void identify(String url) throws Exception{ ! String [] tagsBeingChecked = {"TABLE","DIV","SPAN"}; ! Parser parser = new Parser(url); *************** *** 63,67 **** tagFinder.getEndTagCount(i)); } ! } } --- 63,67 ---- tagFinder.getEndTagCount(i)); } ! } } Index: FunctionalTests.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/FunctionalTests.java,v retrieving revision 1.42 retrieving revision 1.43 diff -C2 -d -r1.42 -r1.43 *** FunctionalTests.java 8 Sep 2003 02:26:30 -0000 1.42 --- FunctionalTests.java 10 Sep 2003 03:38:23 -0000 1.43 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 56,60 **** /** * Based on a suspected bug report by Annette Doyle, ! * to check if the no of image tags are correctly * identified by the parser */ --- 56,60 ---- /** * Based on a suspected bug report by Annette Doyle, ! * to check if the no of image tags are correctly * identified by the parser */ *************** *** 65,74 **** try { int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); } catch (ParserException e) { throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); } ! } --- 65,74 ---- try { int parserImgTagCount = countImageTagsWithHTMLParser(); ! assertEquals("Image Tag Count",imgTagCount,parserImgTagCount); } catch (ParserException e) { throw new ParserException("Error thrown in call to countImageTagsWithHTMLParser()",e); } ! } *************** *** 79,83 **** InputStream is = url.openStream(); BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); imgTagCount = countImageTagsWithoutHTMLParser(reader); is.close(); --- 79,83 ---- InputStream is = url.openStream(); BufferedReader reader; ! reader = new BufferedReader(new InputStreamReader(is)); imgTagCount = countImageTagsWithoutHTMLParser(reader); is.close(); *************** *** 100,105 **** node = (Node)e.nextNode(); if (node instanceof ImageTag) { ! parserImgTagCount++; ! } } return parserImgTagCount; --- 100,105 ---- node = (Node)e.nextNode(); if (node instanceof ImageTag) { ! parserImgTagCount++; ! } } return parserImgTagCount; Index: InstanceofPerformanceTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/InstanceofPerformanceTest.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** InstanceofPerformanceTest.java 8 Sep 2003 02:26:30 -0000 1.13 --- InstanceofPerformanceTest.java 10 Sep 2003 03:38:23 -0000 1.14 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 56,60 **** } } ! public void doInstanceofTest(long [] time,int index, long numTimes) { System.out.println("doInstanceofTest("+index+")"); --- 56,60 ---- } } ! public void doInstanceofTest(long [] time,int index, long numTimes) { System.out.println("doInstanceofTest("+index+")"); *************** *** 62,66 **** for (long i=0;i<numTimes;i++) { for (Enumeration e = formChildren.elements();e.hasMoreElements();) { ! Node node = (Node)e.nextElement(); } } --- 62,66 ---- for (long i=0;i<numTimes;i++) { for (Enumeration e = formChildren.elements();e.hasMoreElements();) { ! Node node = (Node)e.nextElement(); } } *************** *** 68,72 **** time[index] = end-start; } ! public void doGetTypeTest(long [] time,int index, long numTimes) { System.out.println("doGetTypeTest("+index+")"); --- 68,72 ---- time[index] = end-start; } ! public void doGetTypeTest(long [] time,int index, long numTimes) { System.out.println("doGetTypeTest("+index+")"); *************** *** 83,95 **** public void perform() { int numTimes = 30; ! long time1[] = new long[numTimes], time2[] = new long[numTimes]; ! for (int i=0;i<numTimes;i++) doInstanceofTest(time1,i,i*10000); ! for (int i=0;i<numTimes;i++) doGetTypeTest(time2,i,i*10000); ! print(time1,time2); } --- 83,95 ---- public void perform() { int numTimes = 30; ! long time1[] = new long[numTimes], time2[] = new long[numTimes]; ! for (int i=0;i<numTimes;i++) doInstanceofTest(time1,i,i*10000); ! for (int i=0;i<numTimes;i++) doGetTypeTest(time2,i,i*10000); ! print(time1,time2); } *************** *** 101,105 **** } public static void main(String [] args) throws Exception { ! InstanceofPerformanceTest test = new InstanceofPerformanceTest(); test.setUp(); --- 101,105 ---- } public static void main(String [] args) throws Exception { ! InstanceofPerformanceTest test = new InstanceofPerformanceTest(); test.setUp(); Index: LineNumberAssignedByNodeReaderTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/LineNumberAssignedByNodeReaderTest.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** LineNumberAssignedByNodeReaderTest.java 8 Sep 2003 02:26:30 -0000 1.20 --- LineNumberAssignedByNodeReaderTest.java 10 Sep 2003 03:38:23 -0000 1.21 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 49,59 **** super(name); } ! /** * Test to ensure that the <code>Tag</code> being created by the * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. * @throws ParserException if there is a problem parsing the test data ! */ public void testLineNumbers() throws ParserException { testLineNumber("<Custom/>", 1, 0, 1, 1); --- 49,59 ---- super(name); } ! /** * Test to ensure that the <code>Tag</code> being created by the * <code>CompositeTagScanner</code> has the correct startLine and endLine ! * information in the <code>TagData</code> it is constructed with. * @throws ParserException if there is a problem parsing the test data ! */ public void testLineNumbers() throws ParserException { testLineNumber("<Custom/>", 1, 0, 1, 1); *************** *** 93,97 **** ); } ! /** * Helper method to ensure that the <code>Tag</code> being created by the --- 93,97 ---- ); } ! /** * Helper method to ensure that the <code>Tag</code> being created by the *************** *** 100,108 **** * @param xml String containing HTML or XML to parse, containing a Custom tag * @param numNodes int number of expected nodes returned by parser ! * @param useNode int index of the node to test (should be of type CustomTag) * @param startLine int the expected start line number of the tag * @param endLine int the expected end line number of the tag * @throws ParserException if there is an exception during parsing ! */ private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { createParser(xml); --- 100,108 ---- * @param xml String containing HTML or XML to parse, containing a Custom tag * @param numNodes int number of expected nodes returned by parser ! * @param useNode int index of the node to test (should be of type CustomTag) * @param startLine int the expected start line number of the tag * @param endLine int the expected end line number of the tag * @throws ParserException if there is an exception during parsing ! */ private void testLineNumber(String xml, int numNodes, int useNode, int expectedStartLine, int expectedEndLine) throws ParserException { createParser(xml); *************** *** 113,117 **** assertEquals("start line", expectedStartLine, tag.tagData.getStartLine()); assertEquals("end line", expectedEndLine, tag.tagData.getEndLine()); ! } --- 113,117 ---- assertEquals("start line", expectedStartLine, tag.tagData.getStartLine()); assertEquals("end line", expectedEndLine, tag.tagData.getEndLine()); ! } Index: ParserTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** ParserTest.java 8 Sep 2003 02:26:30 -0000 1.40 --- ParserTest.java 10 Sep 2003 03:38:23 -0000 1.41 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 106,111 **** } assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); ! } ! /** * Test the Parser(URLConnection) constructor. --- 106,111 ---- } assertEquals("There should be "+cnt+" nodes identified (second call to parser.elements())",cnt,i); ! } ! /** * Test the Parser(URLConnection) constructor. *************** *** 115,135 **** * <form NAME="SearchQuick" method="POST" action="cp_search_response-e.asp" * onSubmit="return runSubmit();"> ! * * <!-- begin test hidden field code --> * <input TYPE="Hidden" NAME="app_language" value="english"> ! * * <input TYPE="Hidden" NAME="app_response_start_row_number" value="1"> * <input TYPE="Hidden" NAME="app_response_rows_max" value="9"> ! * * <input TYPE="Hidden" NAME="app_source" value="quick"> * <input TYPE="Hidden" NAME="query_source" value="q"> ! * * <input TYPE="Hidden" NAME="name" value> * <input TYPE="Hidden" NAME="postal_code" value> * <input TYPE="Hidden" NAME="directory_area_name" value> ! * * <input TYPE="Hidden" NAME="delivery_mode" value> * <input TYPE="Hidden" NAME="Suffix" value> ! * * <input TYPE="Hidden" NAME="street_direction" value> * <input TYPE="Hidden" NAME="installation_type" value> --- 115,135 ---- * <form NAME="SearchQuick" method="POST" action="cp_search_response-e.asp" * onSubmit="return runSubmit();"> ! * * <!-- begin test hidden field code --> * <input TYPE="Hidden" NAME="app_language" value="english"> ! * * <input TYPE="Hidden" NAME="app_response_start_row_number" value="1"> * <input TYPE="Hidden" NAME="app_response_rows_max" value="9"> ! * * <input TYPE="Hidden" NAME="app_source" value="quick"> * <input TYPE="Hidden" NAME="query_source" value="q"> ! * * <input TYPE="Hidden" NAME="name" value> * <input TYPE="Hidden" NAME="postal_code" value> * <input TYPE="Hidden" NAME="directory_area_name" value> ! * * <input TYPE="Hidden" NAME="delivery_mode" value> * <input TYPE="Hidden" NAME="Suffix" value> ! * * <input TYPE="Hidden" NAME="street_direction" value> * <input TYPE="Hidden" NAME="installation_type" value> *************** *** 137,147 **** * <input TYPE="Hidden" NAME="installation_name" value> * <input TYPE="Hidden" NAME="unit_number" value> ! * * <input TYPE="Hidden" NAME="app_state" value="production"> * <!-- end test hidden field code --> ! * * <p> * <table border="0" cellpadding="0" width="90%" cellspacing="0"> ! * * <tr> * <td class="tbltitle"> Street Number: </td> --- 137,147 ---- * <input TYPE="Hidden" NAME="installation_name" value> * <input TYPE="Hidden" NAME="unit_number" value> ! * * <input TYPE="Hidden" NAME="app_state" value="production"> * <!-- end test hidden field code --> ! * * <p> * <table border="0" cellpadding="0" width="90%" cellspacing="0"> ! * * <tr> * <td class="tbltitle"> Street Number: </td> *************** *** 150,154 **** * </tr> * <tr> ! * * <td> * <input type="text" name="street_number" size="10" maxlength="10"> --- 150,154 ---- * </tr> * <tr> ! * * <td> * <input type="text" name="street_number" size="10" maxlength="10"> *************** *** 160,164 **** * <td><input type="text" name="test" size="10" maxlength="30"></td> * </tr> ! * * </table> * <p> --- 160,164 ---- * <td><input type="text" name="test" size="10" maxlength="30"></td> * </tr> ! * * </table> * <p> *************** *** 171,175 **** * Province: * </td> ! * * </tr> * <tr> --- 171,175 ---- * Province: * </td> ! * * </tr> * <tr> *************** *** 180,184 **** * <select size="1" name="prov"> * <option selected value="NULL">Select</option><option value="AB">AB - Alberta</option><option value="BC">BC - British Columbia</option><option value="MB">MB - Manitoba</option><option value="NB">NB - New Brunswick</option><option value="NL">NL - Newfoundland and Labrador</option><option value="NS">NS - Nova Scotia</option><option value="NT">NT - Northwest Territories</option><option value="NU">NU - Nunavut</option><option value="ON">ON - Ontario</option><option value="PE">PE - Prince Edward Island</option><option value="QC">QC - Quebec</option><option value="SK">SK - Saskatchewan</option><option value="YT">YT - Yukon</option> ! * * </select> * </td> --- 180,184 ---- * <select size="1" name="prov"> * <option selected value="NULL">Select</option><option value="AB">AB - Alberta</option><option value="BC">BC - British Columbia</option><option value="MB">MB - Manitoba</option><option value="NB">NB - New Brunswick</option><option value="NL">NL - Newfoundland and Labrador</option><option value="NS">NS - Nova Scotia</option><option value="NT">NT - Northwest Territories</option><option value="NU">NU - Nunavut</option><option value="ON">ON - Ontario</option><option value="PE">PE - Prince Edward Island</option><option value="QC">QC - Quebec</option><option value="SK">SK - Saskatchewan</option><option value="YT">YT - Yukon</option> ! * * </select> * </td> *************** *** 308,312 **** } assertTrue("POST operation failed.", pass); ! } /** --- 308,312 ---- } assertTrue("POST operation failed.", pass); ! } /** *************** *** 322,326 **** int i; NodeIterator enumeration; ! path = System.getProperty ("user.dir"); if (!path.endsWith (File.separator)) --- 322,326 ---- int i; NodeIterator enumeration; ! path = System.getProperty ("user.dir"); if (!path.endsWith (File.separator)) *************** *** 390,394 **** Parser parser; NodeIterator enumeration; ! try { --- 390,394 ---- Parser parser; NodeIterator enumeration; ! try { *************** *** 415,419 **** int i; Node[] nodes; ! parser = new Parser(url); i = 0; --- 415,419 ---- int i; Node[] nodes; ! parser = new Parser(url); i = 0; *************** *** 437,441 **** Parser parser; String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; ! parser = new Parser(url); for (NodeIterator e = parser.elements();e.hasMoreNodes();) --- 437,441 ---- Parser parser; String url = "http://htmlparser.sourceforge.net/test/DoublequotedCharset.html"; ! parser = new Parser(url); for (NodeIterator e = parser.elements();e.hasMoreNodes();) *************** *** 457,461 **** Parser parser; String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; ! parser = new Parser(url); for (NodeIterator e = parser.elements();e.hasMoreNodes();) --- 457,461 ---- Parser parser; String url = "http://htmlparser.sourceforge.net/test/SinglequotedCharset.html"; ! parser = new Parser(url); for (NodeIterator e = parser.elements();e.hasMoreNodes();) *************** *** 543,554 **** } catch (ParserException e) { ! } } ! public void testURLWithSpaces() throws ParserException{ Parser parser; String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! parser = new Parser(url); Node node [] = new AbstractNode[30]; --- 543,554 ---- } catch (ParserException e) { ! } } ! public void testURLWithSpaces() throws ParserException{ Parser parser; String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; ! parser = new Parser(url); Node node [] = new AbstractNode[30]; *************** *** 557,561 **** node[i] = e.nextNode(); i++; ! } assertEquals("Expected nodes",12,i); --- 557,561 ---- node[i] = e.nextNode(); i++; ! } assertEquals("Expected nodes",12,i); *************** *** 611,615 **** "</script></font>\n"+ "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ! ); parser.registerScanners(); NodeList collectionList = new NodeList(); --- 611,615 ---- "</script></font>\n"+ "<p><font size=-2>©2002 Google</font><font size=-2> - Searching 3,083,324,652 web pages</font></center></body></html>\n" ! ); parser.registerScanners(); NodeList collectionList = new NodeList(); *************** *** 667,671 **** "</tr></table></div>\n"+ "</body>\n"+ ! "</html>"); parser.registerScanners(); NodeList collectionList = new NodeList(); --- 667,671 ---- "</tr></table></div>\n"+ "</body>\n"+ ! "</html>"); parser.registerScanners(); NodeList collectionList = new NodeList(); *************** *** 688,692 **** ); parser.registerScanners(); ! parser.removeScanner(new FormScanner("",parser)); Map scanners = parser.getScanners(); TagScanner scanner = (TagScanner)scanners.get("FORM"); --- 688,692 ---- ); parser.registerScanners(); ! parser.removeScanner(new FormScanner("",parser)); Map scanners = parser.getScanners(); TagScanner scanner = (TagScanner)scanners.get("FORM"); *************** *** 714,718 **** } } ! /** * See bug #729368 Embedded quote and split tag --- 714,718 ---- } } ! /** * See bug #729368 Embedded quote and split tag Index: ParserTestCase.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/ParserTestCase.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** ParserTestCase.java 8 Sep 2003 02:26:30 -0000 1.26 --- ParserTestCase.java 10 Sep 2003 03:38:23 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 49,53 **** public class ParserTestCase extends TestCase { ! static boolean mCaseInsensitiveComparisons = true; protected Parser parser; --- 49,53 ---- public class ParserTestCase extends TestCase { ! static boolean mCaseInsensitiveComparisons = true; protected Parser parser; *************** *** 55,59 **** protected int nodeCount; protected NodeReader reader; ! public ParserTestCase(String name) { super(name); --- 55,59 ---- protected int nodeCount; protected NodeReader reader; ! public ParserTestCase(String name) { super(name); *************** *** 97,102 **** node = new AbstractNode[numNodes]; } ! ! public void assertStringEquals(String message, String expected, String actual) { String mismatchInfo = ""; --- 97,102 ---- node = new AbstractNode[numNodes]; } ! ! public void assertStringEquals(String message, String expected, String actual) { String mismatchInfo = ""; *************** *** 114,124 **** mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) expected.charAt(i)); } ! } ! for (int i = 0; i < expected.length(); i++) { if ( ! (expected.length() != actual.length() && ( ! i >= (expected.length()-1 ) || i >= (actual.length()-1 ) ) --- 114,124 ---- mismatchInfo += ("\nPosition : " + i + " , Code = " + (int) expected.charAt(i)); } ! } ! for (int i = 0; i < expected.length(); i++) { if ( ! (expected.length() != actual.length() && ( ! i >= (expected.length()-1 ) || i >= (actual.length()-1 ) ) *************** *** 129,162 **** StringBuffer errorMsg = new StringBuffer(); errorMsg.append( ! message +mismatchInfo + " \nMismatch of strings at char posn " + i + ! " \n\nString Expected upto mismatch = " + ! expected.substring(0, i) + ! " \n\nString Actual upto mismatch = " + actual.substring(0, i) ); ! if (i<expected.length()) errorMsg.append( ! " \n\nString Expected MISMATCH CHARACTER = "+ expected.charAt(i) + ", code = " + (int) expected.charAt(i) ! ); if (i<actual.length()) errorMsg.append( ! " \n\nString Actual MISMATCH CHARACTER = " + ! actual.charAt(i) + ", code = " + (int) actual.charAt(i) ! ); ! errorMsg.append( ! " \n\n**** COMPLETE STRING EXPECTED ****\n" + ! expected + " \n\n**** COMPLETE STRING ACTUAL***\n" + actual ); fail(errorMsg.toString()); } ! } ! } public void parseNodes() throws ParserException{ --- 129,162 ---- StringBuffer errorMsg = new StringBuffer(); errorMsg.append( ! message +mismatchInfo + " \nMismatch of strings at char posn " + i + ! " \n\nString Expected upto mismatch = " + ! expected.substring(0, i) + ! " \n\nString Actual upto mismatch = " + actual.substring(0, i) ); ! if (i<expected.length()) errorMsg.append( ! " \n\nString Expected MISMATCH CHARACTER = "+ expected.charAt(i) + ", code = " + (int) expected.charAt(i) ! ); if (i<actual.length()) errorMsg.append( ! " \n\nString Actual MISMATCH CHARACTER = " + ! actual.charAt(i) + ", code = " + (int) actual.charAt(i) ! ); ! errorMsg.append( ! " \n\n**** COMPLETE STRING EXPECTED ****\n" + ! expected + " \n\n**** COMPLETE STRING ACTUAL***\n" + actual ); fail(errorMsg.toString()); } ! } ! } public void parseNodes() throws ParserException{ *************** *** 165,169 **** { node[nodeCount++] = e.nextNode(); ! } } --- 165,169 ---- { node[nodeCount++] = e.nextNode(); ! } } *************** *** 185,189 **** String expectedNodeName = expected.getClass().getName(); String actualNodeName = actual.getClass().getName(); ! displayMessage = "The types did not match: Expected "+ expectedNodeName+" \nbut was "+ --- 185,189 ---- String expectedNodeName = expected.getClass().getName(); String actualNodeName = actual.getClass().getName(); ! displayMessage = "The types did not match: Expected "+ expectedNodeName+" \nbut was "+ *************** *** 192,200 **** assertStringEquals(displayMessage, expectedNodeName, actualNodeName); } ! public void assertTagEquals(String displayMessage, Node expected, Node actual) { if (expected instanceof Tag) { Tag expectedTag = (Tag)expected; ! Tag actualTag = (Tag)actual; assertTagNameMatches(displayMessage, expectedTag, actualTag); assertAttributesMatch(displayMessage, expectedTag, actualTag); --- 192,200 ---- assertStringEquals(displayMessage, expectedNodeName, actualNodeName); } ! public void assertTagEquals(String displayMessage, Node expected, Node actual) { if (expected instanceof Tag) { Tag expectedTag = (Tag)expected; ! Tag actualTag = (Tag)actual; assertTagNameMatches(displayMessage, expectedTag, actualTag); assertAttributesMatch(displayMessage, expectedTag, actualTag); *************** *** 211,234 **** assertStringEquals(displayMessage, expectedTagName, actualTagName); } ! public void assertXmlEquals(String displayMessage, String expected, String actual) throws Exception { expected = removeEscapeCharacters(expected); actual = removeEscapeCharacters(actual); ! Parser expectedParser = Parser.createParser(expected); Parser resultParser = Parser.createParser(actual); ! NodeIterator expectedIterator = expectedParser.elements(); NodeIterator actualIterator = resultParser.elements(); displayMessage = createGenericFailureMessage(displayMessage, expected, actual); ! Node nextExpectedNode = null, nextActualNode = null; do { nextExpectedNode = getNextNodeUsing(expectedIterator); nextActualNode = getNextNodeUsing(actualIterator); ! assertStringValueMatches( ! displayMessage, ! nextExpectedNode, nextActualNode ); --- 211,234 ---- assertStringEquals(displayMessage, expectedTagName, actualTagName); } ! public void assertXmlEquals(String displayMessage, String expected, String actual) throws Exception { expected = removeEscapeCharacters(expected); actual = removeEscapeCharacters(actual); ! Parser expectedParser = Parser.createParser(expected); Parser resultParser = Parser.createParser(actual); ! NodeIterator expectedIterator = expectedParser.elements(); NodeIterator actualIterator = resultParser.elements(); displayMessage = createGenericFailureMessage(displayMessage, expected, actual); ! Node nextExpectedNode = null, nextActualNode = null; do { nextExpectedNode = getNextNodeUsing(expectedIterator); nextActualNode = getNextNodeUsing(actualIterator); ! assertStringValueMatches( ! displayMessage, ! nextExpectedNode, nextActualNode ); *************** *** 258,269 **** private void assertStringValueMatches( String displayMessage, Node expectedNode,Node actualNode) { ! String expected = expectedNode.toPlainTextString().trim(); String actual = actualNode.toPlainTextString().trim(); expected = expected.replace('\n', ' '); actual = actual.replace('\n',' '); ! displayMessage = "String value mismatch\nEXPECTED:"+expected+"\nACTUAL:"+actual+displayMessage; assertStringEquals(displayMessage,expected,actual); ! } --- 258,269 ---- private void assertStringValueMatches( String displayMessage, Node expectedNode,Node actualNode) { ! String expected = expectedNode.toPlainTextString().trim(); String actual = actualNode.toPlainTextString().trim(); expected = expected.replace('\n', ' '); actual = actual.replace('\n',' '); ! displayMessage = "String value mismatch\nEXPECTED:"+expected+"\nACTUAL:"+actual+displayMessage; assertStringEquals(displayMessage,expected,actual); ! } *************** *** 278,282 **** } while (actualIterator.hasMoreNodes()); ! displayMessage = "Actual had more data than expected\n"+extraTags+displayMessage; fail(displayMessage); --- 278,282 ---- } while (actualIterator.hasMoreNodes()); ! displayMessage = "Actual had more data than expected\n"+extraTags+displayMessage; fail(displayMessage); *************** *** 298,302 **** String currLine = parser.getReader().getCurrentLine(); int pos = parser.getReader().getLastReadPosition(); ! currLine = currLine.substring(0,pos+1)+ "</"+tag.getTagName()+">"+ --- 298,302 ---- String currLine = parser.getReader().getCurrentLine(); int pos = parser.getReader().getLastReadPosition(); ! currLine = currLine.substring(0,pos+1)+ "</"+tag.getTagName()+">"+ *************** *** 306,310 **** } } ! --- 306,310 ---- } } ! *************** *** 318,322 **** } } ! private void assertActualTagHasNoExtraAttributes(String displayMessage, Tag expectedTag, Tag actualTag) { Iterator i = actualTag.getAttributes().keySet().iterator(); --- 318,322 ---- } } ! private void assertActualTagHasNoExtraAttributes(String displayMessage, Tag expectedTag, Tag actualTag) { Iterator i = actualTag.getAttributes().keySet().iterator(); *************** *** 324,328 **** String key = (String)i.next(); if (key=="/") continue; ! String expectedValue = expectedTag.getAttribute(key); String actualValue = --- 324,328 ---- String key = (String)i.next(); if (key=="/") continue; ! String expectedValue = expectedTag.getAttribute(key); String actualValue = *************** *** 334,345 **** continue; } ! if (expectedValue==null) fail( "\nActual tag had extra key: "+key+displayMessage ); ! } } ! private void assertAllExpectedTagAttributesFoundInActualTag( String displayMessage, --- 334,345 ---- continue; } ! if (expectedValue==null) fail( "\nActual tag had extra key: "+key+displayMessage ); ! } } ! private void assertAllExpectedTagAttributesFoundInActualTag( String displayMessage, *************** *** 350,354 **** String key = (String)i.next(); if (key=="/") continue; ! String expectedValue = expectedTag.getAttribute(key); String actualValue = --- 350,354 ---- String key = (String)i.next(); if (key=="/") continue; ! String expectedValue = expectedTag.getAttribute(key); String actualValue = *************** *** 360,364 **** continue; } ! assertStringEquals( "\nvalue for key "+key+" in tag "+expectedTag.getTagName()+" expected="+expectedValue+" but was "+actualValue+ --- 360,364 ---- continue; } ! assertStringEquals( "\nvalue for key "+key+" in tag "+expectedTag.getTagName()+" expected="+expectedValue+" but was "+actualValue+ *************** *** 380,385 **** public void assertType( ! String message, ! Class expectedType, Object object) { String expectedTypeName = expectedType.getName(); --- 380,385 ---- public void assertType( ! String message, ! Class expectedType, Object object) { String expectedTypeName = expectedType.getName(); *************** *** 392,396 **** actualTypeName+"\n and is :"+((Node)object).toHtml() ); ! } } --- 392,396 ---- actualTypeName+"\n and is :"+((Node)object).toHtml() ); ! } } *************** *** 417,420 **** fail(failMsg.toString()); } ! } } --- 417,420 ---- fail(failMsg.toString()); } ! } } Index: PerformanceTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/PerformanceTest.java,v retrieving revision 1.40 retrieving revision 1.41 diff -C2 -d -r1.40 -r1.41 *** PerformanceTest.java 8 Sep 2003 02:26:30 -0000 1.40 --- PerformanceTest.java 10 Sep 2003 03:38:23 -0000 1.41 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 48,52 **** public PerformanceTest(String file, int numTimes) { this.file = file; ! this.numTimes = numTimes; } --- 48,52 ---- public PerformanceTest(String file, int numTimes) { this.file = file; ! this.numTimes = numTimes; } *************** *** 57,61 **** System.out.println("***************************************"); System.out.println("* Test Without Scanners Registered *"); ! System.out.println("***************************************"); for (int i=0;i<=numTimes;i++) { // Create the parser object --- 57,61 ---- System.out.println("***************************************"); System.out.println("* Test Without Scanners Registered *"); ! System.out.println("***************************************"); for (int i=0;i<=numTimes;i++) { // Create the parser object *************** *** 85,89 **** System.out.println("***************************************"); System.out.println("* Test With Scanners Registered *"); ! System.out.println("***************************************"); for (int i=0;i<=numTimes;i++) { // Create the parser object --- 85,89 ---- System.out.println("***************************************"); System.out.println("* Test With Scanners Registered *"); ! System.out.println("***************************************"); for (int i=0;i<=numTimes;i++) { // Create the parser object *************** *** 107,111 **** System.out.println("***************************************"); } ! public static void main(String[] args) { if (args.length<2) { --- 107,111 ---- System.out.println("***************************************"); } ! public static void main(String[] args) { if (args.length<2) { Index: package.html =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/package.html,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** package.html 8 Sep 2003 02:26:30 -0000 1.13 --- package.html 10 Sep 2003 03:38:23 -0000 1.14 *************** *** 18,22 **** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software --- 18,22 ---- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. ! You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software *************** *** 25,37 **** For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> --- 25,37 ---- For any questions or suggestions, you can write to me at : Email :so...@in... ! ! Postal Address : Somik Raha Extreme Programmer & Coach Industrial Logic Corporation ! 2583 Cedar Street, Berkeley, CA 94708, USA Website : http://www.industriallogic.com ! --> </head> |
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags In directory sc8-pr-cvs1:/tmp/cvs-serv24483/src/org/htmlparser/tags Modified Files: AppletTag.java BaseHrefTag.java BodyTag.java Bullet.java BulletList.java CompositeTag.java Div.java DoctypeTag.java EndTag.java FormTag.java FrameSetTag.java FrameTag.java HeadTag.java Html.java ImageTag.java InputTag.java JspTag.java LabelTag.java LinkTag.java MetaTag.java OptionTag.java ScriptTag.java SelectTag.java Span.java StyleTag.java TableColumn.java TableRow.java TableTag.java Tag.java TextareaTag.java TitleTag.java package.html Log Message: Add style checking target to ant build script: ant checkstyle It uses a jar from http://checkstyle.sourceforge.net which is dropped in the lib directory. The rules are in the file htmlparser_checks.xml in the src directory. Added lexerapplications package with Tabby as the first app. It performs whitespace manipulation on source files to follow the style rules. This reduced the number of style violations to roughly 14,000. There are a few issues with the style checker that need to be resolved before it should be taken too seriously. For example: It thinks all method arguments should be final, even if they are modified by the code (which the compiler frowns on). It complains about long lines, even when there is no possibility of wrapping the line, i.e. a URL in a comment that's more than 80 characters long. It considers all naked integers as 'magic numbers', even when they are obvious, i.e. the 4 corners of a box. It complains about whitespace following braces, even in array initializers, i.e. X[][] = { {a, b} { } } But it points out some really interesting things, even if you don't agree with the style guidelines, so it's worth a look. Index: AppletTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/AppletTag.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** AppletTag.java 8 Sep 2003 02:26:29 -0000 1.26 --- AppletTag.java 10 Sep 2003 03:38:19 -0000 1.27 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 43,47 **** * archive and parameters. */ ! public class AppletTag extends CompositeTag { /** --- 43,47 ---- * archive and parameters. */ ! public class AppletTag extends CompositeTag { /** *************** *** 50,54 **** * @param compositeTagData The data for this composite tag. */ ! public AppletTag (TagData tagData,CompositeTagData compositeTagData) { super(tagData,compositeTagData); --- 50,54 ---- * @param compositeTagData The data for this composite tag. */ ! public AppletTag (TagData tagData,CompositeTagData compositeTagData) { super(tagData,compositeTagData); *************** *** 71,75 **** kids = getChildren (); for (int i = 0; i < kids.size (); i++) ! { node = children.elementAt(i); if (node instanceof Tag) --- 71,75 ---- kids = getChildren (); for (int i = 0; i < kids.size (); i++) ! { node = children.elementAt(i); if (node instanceof Tag) *************** *** 87,91 **** } } ! return (ret); } --- 87,91 ---- } } ! return (ret); } *************** *** 108,112 **** return (createAppletParamsTable ()); } ! /** * Get the jar file of the applet. --- 108,112 ---- return (createAppletParamsTable ()); } ! /** * Get the jar file of the applet. *************** *** 117,121 **** return (getAttribute ("ARCHIVE")); } ! /** * Get the code base of the applet. --- 117,121 ---- return (getAttribute ("ARCHIVE")); } ! /** * Get the code base of the applet. *************** *** 142,146 **** * @return An enumeration of the <code>PARAM<code> tag <code>NAME<code> attributes. */ ! public Enumeration getParameterNames () { return (getAppletParams ().keys ()); --- 142,146 ---- * @return An enumeration of the <code>PARAM<code> tag <code>NAME<code> attributes. */ ! public Enumeration getParameterNames () { return (getAppletParams ().keys ()); *************** *** 155,159 **** setAttribute ("CODE", newAppletClass); } ! /** * Set the enclosed <code>PARM<code> children. --- 155,159 ---- setAttribute ("CODE", newAppletClass); } ! /** * Set the enclosed <code>PARM<code> children. *************** *** 169,173 **** String s; TagData tagData; ! kids = getChildren (); // erase appletParams from kids --- 169,173 ---- String s; TagData tagData; ! kids = getChildren (); // erase appletParams from kids *************** *** 183,187 **** i++; } ! // add newAppletParams to kids for (Enumeration e = newAppletParams.keys (); e.hasMoreElements (); ) --- 183,187 ---- i++; } ! // add newAppletParams to kids for (Enumeration e = newAppletParams.keys (); e.hasMoreElements (); ) *************** *** 193,201 **** kids.add (new Tag (tagData)); } ! //set kids as new children setChildren (kids); } ! /** * Set the <code>ARCHIVE<code> attribute. --- 193,201 ---- kids.add (new Tag (tagData)); } ! //set kids as new children setChildren (kids); } ! /** * Set the <code>ARCHIVE<code> attribute. *************** *** 206,210 **** setAttribute ("ARCHIVE", newArchive); } ! /** * Set the <code>CODEBASE<code> attribute. --- 206,210 ---- setAttribute ("ARCHIVE", newArchive); } ! /** * Set the <code>CODEBASE<code> attribute. *************** *** 229,233 **** Node node; StringBuffer ret; ! ret = new StringBuffer(500); ret.append ("Applet Tag\n"); --- 229,233 ---- Node node; StringBuffer ret; ! ret = new StringBuffer(500); ret.append ("Applet Tag\n"); Index: BaseHrefTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BaseHrefTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** BaseHrefTag.java 8 Sep 2003 02:26:29 -0000 1.24 --- BaseHrefTag.java 10 Sep 2003 03:38:20 -0000 1.25 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: BodyTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BodyTag.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BodyTag.java 8 Sep 2003 02:26:29 -0000 1.13 --- BodyTag.java 10 Sep 2003 03:38:20 -0000 1.14 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 41,49 **** super(tagData,compositeTagData); } ! public String getBody() { return toPlainTextString(); } ! public String toString() { return "BODY: "+getBody(); --- 41,49 ---- super(tagData,compositeTagData); } ! public String getBody() { return toPlainTextString(); } ! public String toString() { return "BODY: "+getBody(); Index: Bullet.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Bullet.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Bullet.java 8 Sep 2003 02:26:29 -0000 1.13 --- Bullet.java 10 Sep 2003 03:38:20 -0000 1.14 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: BulletList.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/BulletList.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** BulletList.java 8 Sep 2003 02:26:29 -0000 1.13 --- BulletList.java 10 Sep 2003 03:38:20 -0000 1.14 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: CompositeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/CompositeTag.java,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** CompositeTag.java 8 Sep 2003 02:26:29 -0000 1.53 --- CompositeTag.java 10 Sep 2003 03:38:20 -0000 1.54 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 63,67 **** public Node getChild (int index) { ! return (getChildren ().elementAt (index)); } --- 63,67 ---- public Node getChild (int index) { ! return (getChildren ().elementAt (index)); } *************** *** 74,78 **** return (getChildren ().toNodeArray ()); } ! /** * Remove the child at the position given. --- 74,78 ---- return (getChildren ().toNodeArray ()); } ! /** * Remove the child at the position given. *************** *** 113,117 **** if (prevNode.elementEnd()>node.elementBegin()) { // Its a new line ! sb.append(Parser.getLineSeparator()); } } --- 113,117 ---- if (prevNode.elementEnd()>node.elementBegin()) { // Its a new line ! sb.append(Parser.getLineSeparator()); } } *************** *** 155,159 **** } } ! if (found) return tag; else --- 155,159 ---- } } ! if (found) return tag; else *************** *** 161,165 **** } ! /** * Searches for any node whose text representation contains the search * string. Collects all such nodes in a NodeList. --- 161,165 ---- } ! /** * Searches for any node whose text representation contains the search * string. Collects all such nodes in a NodeList. *************** *** 182,190 **** for (SimpleNodeIterator e = children();e.hasMoreNodes();) { node = e.nextNode(); ! String nodeTextString = node.toPlainTextString(); if (!caseSensitive) nodeTextString=nodeTextString.toUpperCase(); if (nodeTextString.indexOf(searchString)!=-1) { foundList.add(node); ! } } return foundList; --- 182,190 ---- for (SimpleNodeIterator e = children();e.hasMoreNodes();) { node = e.nextNode(); ! String nodeTextString = node.toPlainTextString(); if (!caseSensitive) nodeTextString=nodeTextString.toUpperCase(); if (nodeTextString.indexOf(searchString)!=-1) { foundList.add(node); ! } } return foundList; *************** *** 193,197 **** /** * Collect all objects that are of a certain type ! * Note that this will not check for parent types, and will not * recurse through child tags * @param classType --- 193,197 ---- /** * Collect all objects that are of a certain type ! * Note that this will not check for parent types, and will not * recurse through child tags * @param classType *************** *** 202,206 **** return (getChildren ().searchFor (classType)); } ! /** * Searches for any node whose text representation contains the search * string. Collects all such nodes in a NodeList. --- 202,206 ---- return (getChildren ().searchFor (classType)); } ! /** * Searches for any node whose text representation contains the search * string. Collects all such nodes in a NodeList. *************** *** 220,224 **** /** ! * Returns the node number of the string node containing the * given text. This can be useful to index into the composite tag * and get other children. --- 220,224 ---- /** ! * Returns the node number of the string node containing the * given text. This can be useful to index into the composite tag * and get other children. *************** *** 232,236 **** node = e.nextNode(); if (node.toPlainTextString().toUpperCase().indexOf(text.toUpperCase())!=-1) { ! return loc; } loc++; --- 232,236 ---- node = e.nextNode(); if (node.toPlainTextString().toUpperCase().indexOf(text.toUpperCase())!=-1) { ! return loc; } loc++; *************** *** 238,248 **** return -1; } ! /** * Returns the node number of a child node given the node object. ! * This would typically be used in conjuction with digUpStringNode, ! * after which the string node's parent can be used to find the ! * string node's position. Faster than calling findPositionOf(text) ! * again. Note that the position is at a linear level alone - there * is no recursion in this method. * @param searchNode The child node to find. --- 238,248 ---- return -1; } ! /** * Returns the node number of a child node given the node object. ! * This would typically be used in conjuction with digUpStringNode, ! * after which the string node's parent can be used to find the ! * string node's position. Faster than calling findPositionOf(text) ! * again. Note that the position is at a linear level alone - there * is no recursion in this method. * @param searchNode The child node to find. *************** *** 255,259 **** node = e.nextNode(); if (node==searchNode) { ! return loc; } loc++; --- 255,259 ---- node = e.nextNode(); if (node==searchNode) { ! return loc; } loc++; *************** *** 261,265 **** return -1; } ! /** * Get child at given index --- 261,265 ---- return -1; } ! /** * Get child at given index *************** *** 270,274 **** return (getChildren ().elementAt (index)); } ! public void collectInto(NodeList collectionList, String filter) { super.collectInto(collectionList, filter); --- 270,274 ---- return (getChildren ().elementAt (index)); } ! public void collectInto(NodeList collectionList, String filter) { super.collectInto(collectionList, filter); *************** *** 286,290 **** } } ! public String getChildrenHTML() { StringBuffer buff = new StringBuffer(); --- 286,290 ---- } } ! public String getChildrenHTML() { StringBuffer buff = new StringBuffer(); *************** *** 295,301 **** return buff.toString(); } ! public void accept(NodeVisitor visitor) { ! if (visitor.shouldRecurseChildren()) { startTag.accept(visitor); SimpleNodeIterator children = children(); --- 295,301 ---- return buff.toString(); } ! public void accept(NodeVisitor visitor) { ! if (visitor.shouldRecurseChildren()) { startTag.accept(visitor); SimpleNodeIterator children = children(); *************** *** 322,328 **** } ! /** * Finds a string node, however embedded it might be, and returns ! * it. The string node will retain links to its parents, so * further navigation is possible. * @param searchText --- 322,328 ---- } ! /** * Finds a string node, however embedded it might be, and returns ! * it. The string node will retain links to its parents, so * further navigation is possible. * @param searchText *************** *** 341,345 **** StringNode [] nodes = ctag.digupStringNode(searchText); for (int j=0;j<nodes.length;j++) ! stringNodes.add(nodes[j]); } } --- 341,345 ---- StringNode [] nodes = ctag.digupStringNode(searchText); for (int j=0;j<nodes.length;j++) ! stringNodes.add(nodes[j]); } } Index: Div.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Div.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** Div.java 8 Sep 2003 02:26:29 -0000 1.13 --- Div.java 10 Sep 2003 03:38:20 -0000 1.14 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: DoctypeTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/DoctypeTag.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** DoctypeTag.java 8 Sep 2003 02:26:29 -0000 1.26 --- DoctypeTag.java 10 Sep 2003 03:38:20 -0000 1.27 *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@ki... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Email : so...@in... --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@ki... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Email : so...@in... Index: EndTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/EndTag.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** EndTag.java 8 Sep 2003 02:26:29 -0000 1.29 --- EndTag.java 10 Sep 2003 03:38:20 -0000 1.30 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 42,46 **** public final static int ENDTAG_BEGIN_PARSING_STATE=2; public final static int ENDTAG_FINISHED_PARSING_STATE=3; ! /** * Constructor takes 3 arguments to construct an EndTag object. --- 42,46 ---- public final static int ENDTAG_BEGIN_PARSING_STATE=2; public final static int ENDTAG_FINISHED_PARSING_STATE=3; ! /** * Constructor takes 3 arguments to construct an EndTag object. *************** *** 72,76 **** state = ENDTAG_FINISHED_PARSING_STATE; tagEnd = i; ! } if (state==ENDTAG_BEGIN_PARSING_STATE) { --- 72,76 ---- state = ENDTAG_FINISHED_PARSING_STATE; tagEnd = i; ! } if (state==ENDTAG_BEGIN_PARSING_STATE) { *************** *** 79,83 **** if (state==ENDTAG_WAIT_FOR_SLASH_STATE) { ! if (ch=='/') { state = ENDTAG_BEGIN_PARSING_STATE; --- 79,83 ---- if (state==ENDTAG_WAIT_FOR_SLASH_STATE) { ! if (ch=='/') { state = ENDTAG_BEGIN_PARSING_STATE; *************** *** 97,101 **** { state=ENDTAG_FINISHED_PARSING_STATE; ! tagEnd=i; } } --- 97,101 ---- { state=ENDTAG_FINISHED_PARSING_STATE; ! tagEnd=i; } } *************** *** 108,115 **** tagEnd=i; state=ENDTAG_FINISHED_PARSING_STATE; ! } if (state==ENDTAG_FINISHED_PARSING_STATE) return new EndTag(new TagData(tagBegin,tagEnd,tagContents.toString(),input)); ! else return null; } public String toPlainTextString() { --- 108,115 ---- tagEnd=i; state=ENDTAG_FINISHED_PARSING_STATE; ! } if (state==ENDTAG_FINISHED_PARSING_STATE) return new EndTag(new TagData(tagBegin,tagEnd,tagContents.toString(),input)); ! else return null; } public String toPlainTextString() { *************** *** 122,126 **** return "EndTag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } ! public void accept(NodeVisitor visitor) { visitor.visitEndTag(this); --- 122,126 ---- return "EndTag : "+tagContents+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } ! public void accept(NodeVisitor visitor) { visitor.visitEndTag(this); Index: FormTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FormTag.java,v retrieving revision 1.32 retrieving revision 1.33 diff -C2 -d -r1.32 -r1.33 *** FormTag.java 8 Sep 2003 02:26:29 -0000 1.32 --- FormTag.java 10 Sep 2003 03:38:20 -0000 1.33 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 55,59 **** protected NodeList formInputList; private NodeList textAreaList; ! /** * Constructor takes in tagData, compositeTagData, formTagData --- 55,59 ---- protected NodeList formInputList; private NodeList textAreaList; ! /** * Constructor takes in tagData, compositeTagData, formTagData *************** *** 64,68 **** { super(tagData,compositeTagData); ! this.formURL = compositeTagData.getStartTag().getAttribute("ACTION"); this.formName = compositeTagData.getStartTag().getAttribute("NAME"); --- 64,68 ---- { super(tagData,compositeTagData); ! this.formURL = compositeTagData.getStartTag().getAttribute("ACTION"); this.formName = compositeTagData.getStartTag().getAttribute("NAME"); *************** *** 71,75 **** this.textAreaList = compositeTagData.getChildren().searchFor(TextareaTag.class, true); } ! /** * Get the list of input fields. --- 71,75 ---- this.textAreaList = compositeTagData.getChildren().searchFor(TextareaTag.class, true); } ! /** * Get the list of input fields. *************** *** 80,84 **** return formInputList; } ! /** * Get the list of text areas. --- 80,84 ---- return formInputList; } ! /** * Get the list of text areas. *************** *** 89,93 **** return textAreaList; } ! /** * @return String The url of the form --- 89,93 ---- return textAreaList; } ! /** * @return String The url of the form *************** *** 97,101 **** return formURL; } ! /** * Returns the method of the form --- 97,101 ---- return formURL; } ! /** * Returns the method of the form *************** *** 104,113 **** public String getFormMethod() { if(formMethod==null) ! { formMethod = "GET"; } return formMethod; } ! /** * Get the input tag in the form corresponding to the given name --- 104,113 ---- public String getFormMethod() { if(formMethod==null) ! { formMethod = "GET"; } return formMethod; } ! /** * Get the input tag in the form corresponding to the given name *************** *** 128,132 **** return inputTag; else return null; } ! /** * @return String The name of the form --- 128,132 ---- return inputTag; else return null; } ! /** * @return String The name of the form *************** *** 135,141 **** return formName; } ! /** ! * Set the form location. Modification of this element will cause the HTML rendering * to change as well (in a call to toHTML()). * @param formURL The new FORM location --- 135,141 ---- return formName; } ! /** ! * Set the form location. Modification of this element will cause the HTML rendering * to change as well (in a call to toHTML()). * @param formURL The new FORM location *************** *** 167,175 **** } } ! if (found) return textareaTag; else return null; } ! } --- 167,175 ---- } } ! if (found) return textareaTag; else return null; } ! } Index: FrameSetTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameSetTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** FrameSetTag.java 8 Sep 2003 02:26:29 -0000 1.24 --- FrameSetTag.java 10 Sep 2003 03:38:20 -0000 1.25 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 49,53 **** this.frames = compositeTagData.getChildren(); } ! /** * Returns the location of the frame --- 49,53 ---- this.frames = compositeTagData.getChildren(); } ! /** * Returns the location of the frame *************** *** 56,64 **** return frameURL; } ! public String getFrameName() { return frameName; } ! /** * Print the contents of the HTMLImageNode --- 56,64 ---- return frameURL; } ! public String getFrameName() { return frameName; } ! /** * Print the contents of the HTMLImageNode *************** *** 67,71 **** return "FRAME TAG : Image at "+frameURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } ! /** * Returns the frames. --- 67,71 ---- return "FRAME TAG : Image at "+frameURL+"; begins at : "+elementBegin()+"; ends at : "+elementEnd(); } ! /** * Returns the frames. Index: FrameTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/FrameTag.java,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** FrameTag.java 8 Sep 2003 02:26:29 -0000 1.24 --- FrameTag.java 10 Sep 2003 03:38:20 -0000 1.25 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com Index: HeadTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/HeadTag.java,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** HeadTag.java 8 Sep 2003 02:26:29 -0000 1.13 --- HeadTag.java 10 Sep 2003 03:38:20 -0000 1.14 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,44 **** super(tagData, compositeTagData); } ! public String toString() { return "HEAD: " + super.toString(); --- 40,44 ---- super(tagData, compositeTagData); } ! public String toString() { return "HEAD: " + super.toString(); Index: Html.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/Html.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** Html.java 8 Sep 2003 02:26:29 -0000 1.25 --- Html.java 10 Sep 2003 03:38:20 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,40 **** super(tagData, compositeTagData); } ! } --- 37,40 ---- super(tagData, compositeTagData); } ! } Index: ImageTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** ImageTag.java 8 Sep 2003 02:26:29 -0000 1.25 --- ImageTag.java 10 Sep 2003 03:38:21 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 33,37 **** /** ! * Identifies an image tag */ public class ImageTag extends Tag --- 33,37 ---- /** ! * Identifies an image tag */ public class ImageTag extends Tag Index: InputTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/InputTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** InputTag.java 8 Sep 2003 02:26:29 -0000 1.25 --- InputTag.java 10 Sep 2003 03:38:21 -0000 1.26 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,41 **** super(tagData); } ! public String toString() { return (ParserUtils.toString(this)); --- 37,41 ---- super(tagData); } ! public String toString() { return (ParserUtils.toString(this)); Index: JspTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/JspTag.java,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** JspTag.java 8 Sep 2003 02:26:29 -0000 1.27 --- JspTag.java 10 Sep 2003 03:38:21 -0000 1.28 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 45,53 **** super(tagData); } ! public String toHtml() { return "<%"+tagContents+"%>"; } ! /** * Print the contents of the remark tag. --- 45,53 ---- super(tagData); } ! public String toHtml() { return "<%"+tagContents+"%>"; } ! /** * Print the contents of the remark tag. Index: LabelTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LabelTag.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** LabelTag.java 8 Sep 2003 02:26:29 -0000 1.26 --- LabelTag.java 10 Sep 2003 03:38:21 -0000 1.27 *************** *** 1,27 **** // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 1,27 ---- // HTMLParser Library v1_4_20030907 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha ! // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. ! // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! // // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 40,48 **** super(tagData, compositeTagData); } ! public String getLabel() { return toPlainTextString(); } ! public String toString() { return "LABEL: "+getLabel(); --- 40,48 ---- super(tagData, compositeTagData); } ! public String getLabel() { return toPlainTextString(); } ! public String toString() { return "LABEL: "+getLabel(); Index: LinkTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v retrieving revision 1.33 retrieving revision 1.34 diff -C2 -d -r1.33 -r1.34 *** LinkTag.java 8 Sep 2003 02:26:29 -0000 1.33 --- LinkTag.java 10 Sep 2003 03:38:21 -0000 1.34 *************** *** 11,15 **** // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software --- 11,15 ---- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. ! // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software *************** *** 18,27 **** // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com --- 18,27 ---- // For any questions or suggestions, you can write to me at : // Email :so...@in... ! // ! // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation ! // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com *************** *** 37,41 **** import org.htmlparser.visitors.NodeVisitor; /** ! * Identifies a link tag */ public class LinkTag extends CompositeTag --- 37,41 ---- import org.htmlparser.visitors.NodeVisitor; /** ! * Identifies a link tag */ public class LinkTag extends CompositeTag *************** *** 44,48 **** /** * The URL where the link points to ! */ protected String link; /** --- 44,48 ---- /** * The URL where the link points to ! */ protected String link; /** *************** *** 57,66 **** private boolean javascriptLink; ! /** * Constructor creates an HTMLLinkNode object, which basically stores the location * where the link points to, and the text it contains. * <p> ! * In order to get the contents of the link tag, use the method linkData(), * which returns an enumeration of nodes encapsulated within the link. * <p> --- 57,66 ---- private boolean javascriptLink; ! /** * Constructor creates an HTMLLinkNode object, which basically stores the location * where the link points to, and the text it contains. * <p> ! * In order to get the contents of the link tag, use the method linkData(), * which returns an enumeration of nodes encapsulated within the link. * <p> *************** *** 78,82 **** * </pre> * There is another mechanism available that allows for uniform extraction of images. You could do this to ! * get all images from a web page : * <pre> * Node node; --- 78,82 ---- * </pre> * There is another mechanism available that allows for uniform extraction of images. You could do this to ! * get all images from a web page : * <pre> * Node node; *************** *** 94,98 **** */ public LinkTag(TagData tagData,CompositeTagData compositeTagData,LinkData linkData) { ! super(tagData,compositeTagData); this.link = linkData.getLink(); this.linkText = linkData.getLinkText(); --- 94,98 ---- */ public LinkTag(TagData tagData,CompositeTagData compositeTagData,LinkData linkData) { ! super(tagData,compositeTagData); this.link = linkData.getLink(); this.linkText = linkData.getLinkText(); *************** *** 184,188 **** /** * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.). ! * * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.) */ --- 184,188 ---- /** * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.). ! * * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.) */ *************** *** 203,207 **** /** * Set the link as a javascript link. ! * * @param newJavascriptLink flag indicating if the link is a javascript code */ --- 203,207 ---- /** * Set the link as a javascript link. ! * * @param newJavascriptLink flag indicating if the link is a javascript code */ *************** *** 219,227 **** if (accessKey==null) sb.append("null\n"); else sb.append(accessKey+"\n"); ! if (children()!=null) { sb.append(" "+"LinkData\n"); sb.append(" "+"--------\n"); ! Node node; int i = 0; --- 219,227 ---- if (accessKey==null) sb.append("null\n"); else sb.append(accessKey+"\n"); ! if (children()!=null) { sb.append(" "+"LinkData\n"); sb.append(" "+"--------\n"); ! Node node; int i = 0; *************** *** 250,254 **** return children(); } ! public void accept(NodeVisitor visitor) { visitor.visitLinkTag(this); --- 250,254 ---- return children(); } ! public void accept(NodeVisitor visitor) { visitor.visitLinkTag(this); Index: MetaTag.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/MetaTag.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** MetaTag.java 8 Sep 2003 02:26:29 -0000 1.25 --- MetaTag.java 10 Sep 2003 03:38:21 -0000 1.26 *************** *** 11,15 **** // ... [truncated message content] |