[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/lexerTests KitTest.java,NONE,1.1 LexerTests.jav
Brought to you by:
derrickoswald
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests
In directory sc8-pr-cvs1:/tmp/cvs-serv9123/tests/lexerTests
Modified Files:
AllTests.java PageIndexTests.java PageTests.java
SourceTests.java
Added Files:
KitTest.java LexerTests.java
Log Message:
Third drop for new i/o subsystem.
--- NEW FILE: KitTest.java ---
/*
* KitTest.java
*
* Created on August 16, 2003, 2:16 PM
*/
package org.htmlparser.tests.lexerTests;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.util.Vector;
import javax.swing.text.BadLocationException;
import javax.swing.text.Document;
import javax.swing.text.EditorKit;
import javax.swing.text.Element;
import javax.swing.text.ElementIterator;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTMLEditorKit.Parser;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import org.htmlparser.Node;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.lexer.nodes.Attribute;
import org.htmlparser.lexer.nodes.TagNode;
import org.htmlparser.util.ParserException;
/**
*
* @author derrick
*/
public class KitTest extends ParserCallback
{
Vector mNodes;
int mIndex;
/** Creates a new instance of KitTest */
public KitTest (Vector nodes)
{
mNodes = nodes;
mIndex = 0;
}
public void handleText (char[] data, int pos)
{
StringBuffer sb;
String theirs;
Node node;
int match;
String ours;
sb = new StringBuffer (data.length);
for (int i = 0; i < data.length; i++)
{
if (160 == (int)data[i])
sb.append (" ");
else
sb.append (data[i]);
}
theirs = sb.toString ();
match = -1;
for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++)
{
node = (Node)mNodes.elementAt (i);
ours = node.getText ();
if (theirs.equalsIgnoreCase (ours))
{
match = i;
break;
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
System.out.println (" ours: " + ours);
mIndex++;
}
else
{
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
public void handleComment (char[] data, int pos)
{
StringBuffer sb;
String theirs;
Node node;
int match;
String ours;
sb = new StringBuffer (data.length);
sb.append (data);
theirs = sb.toString ();
match = -1;
for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++)
{
node = (Node)mNodes.elementAt (i);
ours = node.getText ();
if (theirs.equalsIgnoreCase (ours))
{
match = i;
break;
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
System.out.println (" ours: " + ours);
mIndex++;
}
else
{
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
public void handleStartTag (HTML.Tag t, MutableAttributeSet a, int pos)
{
StringBuffer sb;
String theirs;
Node node;
int match;
String ours;
theirs = t.toString ();
match = -1;
for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++)
{
node = (Node)mNodes.elementAt (i);
if (node instanceof TagNode)
{
ours = ((Attribute)(((TagNode)node).getAttributesEx ().elementAt (0))).getName ();
if (theirs.equalsIgnoreCase (ours))
{
match = i;
break;
}
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
System.out.println (" ours: " + ours);
mIndex++;
}
else
{
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
public void handleEndTag (HTML.Tag t, int pos)
{
StringBuffer sb;
String theirs;
Node node;
int match;
String ours;
theirs = t.toString ();
match = -1;
for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++)
{
node = (Node)mNodes.elementAt (i);
if (node instanceof TagNode)
{
ours = ((Attribute)(((TagNode)node).getAttributesEx ().elementAt (0))).getName ().substring (1);
if (theirs.equalsIgnoreCase (ours))
{
match = i;
break;
}
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
System.out.println (" ours: " + ours);
mIndex++;
}
else
{
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
public void handleSimpleTag (HTML.Tag t, MutableAttributeSet a, int pos)
{
StringBuffer sb;
String theirs;
Node node;
int match;
String ours;
theirs = t.toString ();
match = -1;
for (int i = mIndex; i < Math.min (mIndex + 25, mNodes.size ()); i++)
{
node = (Node)mNodes.elementAt (i);
if (node instanceof TagNode)
{
ours = ((Attribute)(((TagNode)node).getAttributesEx ().elementAt (0))).getName ();
if (theirs.equalsIgnoreCase (ours))
{
match = i;
break;
}
else if (theirs.equalsIgnoreCase (ours.substring (1)))
{
match = i;
break;
}
}
}
if (-1 == match)
{
node = (Node)mNodes.elementAt (mIndex);
ours = node.getText ();
System.out.println ("theirs: " + theirs);
System.out.println (" ours: " + ours);
mIndex++;
}
else
{
// System.out.println (" match: " + theirs);
mIndex = match + 1;
}
}
public void handleError (String errorMsg, int pos)
{
// System.out.println ("******* error @" + pos + " ******** " + errorMsg);
}
public void flush () throws BadLocationException
{
}
/**
* This is invoked after the stream has been parsed, but before
* <code>flush</code>. <code>eol</code> will be one of \n, \r
* or \r\n, which ever is encountered the most in parsing the
* stream.
*
* @since 1.3
*/
public void handleEndOfLineString (String eol)
{
}
// /**
// * Get the document data from the URL.
// * @param rd The reader to read bytes from.
// * @return The parsed HTML document.
// */
// protected static Element[] getData (Reader rd) throws IOException
// {
// EditorKit kit;
// Document doc;
// Element[] ret;
//
// ret = null;
//
// // need this because HTMLEditorKit is not thread safe apparently
// synchronized (Boolean.TRUE)
// {
// kit = new HTMLEditorKit ();
// doc = kit.createDefaultDocument ();
// // the Document class does not yet handle charset's properly
// doc.putProperty ("IgnoreCharsetDirective", Boolean.TRUE);
//
// try
// {
// // parse the HTML
// kit.read (rd, doc, 0);
// }
// catch (BadLocationException ble)
// {
// throw new IOException ("parse error " + ble.getMessage ());
// }
//
// ret = doc.getRootElements ();
// }
//
// return (ret);
// }
// public static void scanElements (Element element) throws BadLocationException
// {
// int start;
// int end;
// String string;
// ElementIterator it;
// Element child;
//
// if (element.isLeaf ())
// {
// start = element.getStartOffset ();
// end = element.getEndOffset ();
// string = element.getDocument ().getText (start, end - start);
// System.out.println (string);
// }
// else
// // iterate through the elements of the element
// for (int i = 0; i < element.getElementCount (); i++)
// {
// child = element.getElement (i);
// scanElements (child);
// }
// }
class MyKit extends HTMLEditorKit
{
public MyKit ()
{
}
public HTMLEditorKit.Parser getParser ()
{
return (super.getParser ());
}
}
public MyKit getKit ()
{
return (new MyKit ());
}
/**
* @param args the command line arguments
*/
public static void main (String[] args) throws ParserException, IOException
{
Lexer lexer;
Node node;
Vector nodes;
KitTest test;
MyKit kit;
Parser parser;
Element[] elements;
// pass through it once to read the entire page
URL url = new URL ("http://sourceforge.net/projects/htmlparser");
lexer = new Lexer (url.openConnection ());
nodes = new Vector ();
while (null != (node = lexer.nextNode ()))
nodes.addElement (node);
// reset the reader
lexer.getPage ().getSource ().reset ();
test = new KitTest (nodes);
kit = test.getKit ();
parser = kit.getParser ();
parser.parse ((Reader)lexer.getPage ().getSource (), (ParserCallback)test, true);
}
}
--- NEW FILE: LexerTests.java ---
// HTMLParser Library v1_4_20030810 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// For any questions or suggestions, you can write to me at :
// Email :so...@in...
//
// Postal Address :
// Somik Raha
// Extreme Programmer & Coach
// Industrial Logic Corporation
// 2583 Cedar Street, Berkeley,
// CA 94708, USA
// Website : http://www.industriallogic.com
package org.htmlparser.tests.lexerTests;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import junit.framework.TestCase;
import org.htmlparser.Node;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
import org.htmlparser.lexer.PageIndex;
import org.htmlparser.lexer.Stream;
import org.htmlparser.lexer.nodes.RemarkNode;
import org.htmlparser.lexer.nodes.StringNode;
import org.htmlparser.lexer.nodes.TagNode;
import org.htmlparser.util.ParserException;
public class LexerTests extends TestCase
{
/**
* Test the Lexer class.
*/
public LexerTests (String name)
{
super (name);
}
/**
* Test operation without tags.
*/
public void testPureText () throws ParserException
{
String reference;
Lexer lexer;
StringNode node;
reference = "Hello world";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
}
/**
* Test operation with Unix line endings.
*/
public void testUnixEOL () throws ParserException
{
String reference;
Lexer lexer;
StringNode node;
reference = "Hello\nworld";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
}
/**
* Test operation with Dos line endings.
*/
public void testDosEOL () throws ParserException
{
String reference;
Lexer lexer;
StringNode node;
reference = "Hello\r\nworld";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
reference = "Hello\rworld";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
}
/**
* Test operation with line endings near the end of input.
*/
public void testEOF_EOL () throws ParserException
{
String reference;
Lexer lexer;
StringNode node;
reference = "Hello world\n";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
reference = "Hello world\r";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
reference = "Hello world\r\n";
lexer = new Lexer (reference);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", reference, node.getText ());
}
/**
* Test that tags stop string nodes.
*/
public void testTagStops () throws ParserException
{
String[] references =
{
"Hello world",
"Hello world\n",
"Hello world\r\n",
"Hello world\r",
};
String[] suffixes =
{
"<head>",
"</head>",
"<%=head%>",
"<!--head-->",
};
Lexer lexer;
StringNode node;
for (int i = 0; i < references.length; i++)
{
for (int j = 0; j < suffixes.length; j++)
{
lexer = new Lexer (references[i] + suffixes[j]);
node = (StringNode)lexer.nextNode ();
assertEquals ("StringNode contents wrong", references[i], node.getText ());
}
}
}
/**
* Test operation with only tags.
*/
public void testPureTag () throws ParserException
{
String reference;
String suffix;
Lexer lexer;
TagNode node;
reference = "<head>";
lexer = new Lexer (reference);
node = (TagNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
reference = "<head>";
suffix = "<body>";
lexer = new Lexer (reference + suffix);
node = (TagNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
node = (TagNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", suffix, node.toHtml ());
}
/**
* Test operation with attributed tags.
*/
public void testAttributedTag () throws ParserException
{
String reference;
Lexer lexer;
TagNode node;
reference = "<head lang='en_US' dir=ltr\nprofile=\"http://htmlparser.sourceforge.org/dictionary.html\">";
lexer = new Lexer (reference);
node = (TagNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
}
/**
* Test operation with comments.
*/
public void testRemarkNode () throws ParserException
{
String reference;
Lexer lexer;
RemarkNode node;
String suffix;
reference = "<!-- This is a comment -->";
lexer = new Lexer (reference);
node = (RemarkNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
reference = "<!-- This is a comment -- >";
lexer = new Lexer (reference);
node = (RemarkNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
reference = "<!-- This is a\nmultiline comment -->";
lexer = new Lexer (reference);
node = (RemarkNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
suffix = "<head>";
reference = "<!-- This is a comment -->";
lexer = new Lexer (reference + suffix);
node = (RemarkNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
reference = "<!-- This is a comment -- >";
lexer = new Lexer (reference + suffix);
node = (RemarkNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
reference = "<!-- This is a\nmultiline comment -->";
lexer = new Lexer (reference + suffix);
node = (RemarkNode)lexer.nextNode ();
assertEquals ("Tag contents wrong", reference, node.toHtml ());
}
/**
* Try a real page.
*/
public void testReal () throws ParserException, IOException
{
Lexer lexer;
Node node;
URL url = new URL ("http://sourceforge.net/projects/htmlparser");
lexer = new Lexer (url.openConnection ());
while (null != (node = lexer.nextNode ()))
System.out.println (node.toString ());
}
}
Index: AllTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/AllTests.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** AllTests.java 11 Aug 2003 00:18:31 -0000 1.4
--- AllTests.java 17 Aug 2003 16:09:27 -0000 1.5
***************
*** 46,49 ****
--- 46,50 ----
suite.addTestSuite (PageTests.class);
suite.addTestSuite (PageIndexTests.class);
+ suite.addTestSuite (LexerTests.class);
return suite;
}
Index: PageIndexTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageIndexTests.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** PageIndexTests.java 11 Aug 2003 00:18:31 -0000 1.2
--- PageIndexTests.java 17 Aug 2003 16:09:27 -0000 1.3
***************
*** 123,131 ****
// test for correct position
if (0 <= pos - 1)
! assertTrue ("search error less " + pos + " " + index.elementAt (pos - 1) + " " + n, index.elementAt (pos - 1) < n);
if (pos + 1 < index.size ())
assertTrue ("search error greater " + pos + " " + index.elementAt (pos + 1) + " " + n, index.elementAt (pos + 1) > n);
-
- assertTrue ("wrong position", pos == index.add (n));
}
--- 123,129 ----
// test for correct position
if (0 <= pos - 1)
! assertTrue ("search error less " + pos + " " + index.elementAt (pos - 1) + " " + n, index.elementAt (pos - 1) <= n);
if (pos + 1 < index.size ())
assertTrue ("search error greater " + pos + " " + index.elementAt (pos + 1) + " " + n, index.elementAt (pos + 1) > n);
}
Index: PageTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageTests.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** PageTests.java 11 Aug 2003 00:18:31 -0000 1.4
--- PageTests.java 17 Aug 2003 16:09:27 -0000 1.5
***************
*** 66,70 ****
try
{
! page = new Page (null);
assertTrue ("null value in constructor", false);
}
--- 66,80 ----
try
{
! page = new Page ((URLConnection)null);
! assertTrue ("null value in constructor", false);
! }
! catch (IllegalArgumentException iae)
! {
! // expected outcome
! }
!
! try
! {
! page = new Page ((String)null);
assertTrue ("null value in constructor", false);
}
Index: SourceTests.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/SourceTests.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** SourceTests.java 11 Aug 2003 00:18:31 -0000 1.3
--- SourceTests.java 17 Aug 2003 16:09:27 -0000 1.4
***************
*** 106,110 ****
source = new Source (new Stream (new ByteArrayInputStream ("hello word".getBytes ())), null);
assertTrue ("no character", -1 != source.read ());
! source.close ();
try
{
--- 106,110 ----
source = new Source (new Stream (new ByteArrayInputStream ("hello word".getBytes ())), null);
assertTrue ("no character", -1 != source.read ());
! source.destroy ();
try
{
|