Thread: [Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/temporaryFailures AttributeParserTest.java,NONE
Brought to you by:
derrickoswald
From: <so...@us...> - 2003-06-17 01:52:28
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/temporaryFailures In directory sc8-pr-cvs1:/tmp/cvs-serv7764/src/org/htmlparser/tests/temporaryFailures Added Files: AttributeParserTest.java TagParserTest.java Log Message: added temporaryFailures package --- NEW FILE: AttributeParserTest.java --- // HTMLParser Library v1_4_20030601 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic Corporation // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com // ---- IMPORTANT: This class has failing tests ---- // Original Location: org.htmlparser.tests.parserHelperTests; // Pls rememeber to add test back to org.htmlparser.tests.parserHelperTests.AllTests.suite() // and delete these comments when you're done. // ---- NEEDS FIXING ---- package org.htmlparser.tests.temporaryFailures; import java.util.Hashtable; import org.htmlparser.Parser; import org.htmlparser.parserHelper.AttributeParser; import org.htmlparser.tags.Tag; import org.htmlparser.tags.data.TagData; import org.htmlparser.tests.ParserTestCase; public class AttributeParserTest extends ParserTestCase { private AttributeParser parser; private Tag tag; private Hashtable table; public AttributeParserTest(String name) { super(name); } protected void setUp() { parser = new AttributeParser(); } public void getParameterTableFor(String tagContents) { tag = new Tag(new TagData(0,0,tagContents,"")); table = parser.parseAttributes(tag); } public void testParseParameters() { getParameterTableFor("a b = \"c\""); assertEquals("Value","c",table.get("B")); } public void testParseTokenValues() { getParameterTableFor("a b = \"'\""); assertEquals("Value","'",table.get("B")); } public void testParseEmptyValues() { getParameterTableFor("a b = \"\""); assertEquals("Value","",table.get("B")); } public void testParseMissingEqual() { getParameterTableFor("a b\"c\""); assertEquals("ValueB","",table.get("B")); } public void testTwoParams(){ getParameterTableFor("PARAM NAME=\"Param1\" VALUE=\"Somik\">\n"); assertEquals("Param1","Param1",table.get("NAME")); assertEquals("Somik","Somik",table.get("VALUE")); } public void testPlainParams(){ getParameterTableFor("PARAM NAME=Param1 VALUE=Somik"); assertEquals("Param1","Param1",table.get("NAME")); assertEquals("Somik","Somik",table.get("VALUE")); } public void testValueMissing() { getParameterTableFor("INPUT type=\"checkbox\" name=\"Authorize\" value=\"Y\" checked"); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); assertEquals("Type","checkbox",table.get("TYPE")); assertEquals("Name","Authorize",table.get("NAME")); assertEquals("Value","Y",table.get("VALUE")); assertEquals("Checked","",table.get("CHECKED")); } /** * This is a simulation of a bug reported by Dhaval Udani - wherein * a space before the end of the tag causes a problem - there is a key * in the table with just a space in it and an empty value */ public void testIncorrectSpaceKeyBug() { getParameterTableFor("TEXTAREA name=\"Remarks\" "); // There should only be two keys.. assertEquals("There should only be two keys",2,table.size()); // The first key is name String key1 = "NAME"; String value1 = (String)table.get(key1); assertEquals("Expected value 1", "Remarks",value1); String key2 = Tag.TAGNAME; assertEquals("Expected Value 2","TEXTAREA",table.get(key2)); } public void testNullTag(){ getParameterTableFor("INPUT type="); assertEquals("Name of Tag","INPUT",table.get(Tag.TAGNAME)); assertEquals("Type","",table.get("TYPE")); } public void testAttributeWithSpuriousEqualTo() { getParameterTableFor( "a class=rlbA href=/news/866201.asp?0sl=-32" ); assertStringEquals( "href", "/news/866201.asp?0sl=-32", (String)table.get("HREF") ); } public void testQuestionMarksInAttributes() { getParameterTableFor( "a href=\"mailto:sa...@ne...?subject=Site Comments\"" ); assertStringEquals( "href", "mailto:sa...@ne...?subject=Site Comments", (String)table.get("HREF") ); assertStringEquals( "tag name", "A", (String)table.get(Tag.TAGNAME) ); } /** * Believe it or not Moi (vincent_aumont) wants htmlparser to parse a text file * containing something that looks nearly like a tag: * <pre> * " basic_string<char, string_char_traits<char>, <>>::basic_string()" * </pre> * This was throwing a null pointer exception when the empty <> was encountered. * Bug #725420 NPE in StringBean.visitTag **/ public void testEmptyTag () { getParameterTableFor(""); assertNotNull ("No Tag.TAGNAME",table.get(Tag.TAGNAME)); } /** * Test attributes when they contain scriptlets. * Submitted by Cory Seefurth * See also feature request #725376 Handle script in attributes. * Only perform this test if it's version 1.4 or higher. */ public void testJspWithinAttributes() { Parser parser; parser = new Parser (); if (1.4 <= Parser.getVersionNumber ()) { getParameterTableFor( "a href=\"<%=Application(\"sURL\")%>/literature/index.htm" ); assertStringEquals( "href", "<%=Application(\"sURL\")%>/literature/index.htm", (String)table.get("HREF") ); } } /** * Test Script in attributes. * See feature request #725376 Handle script in attributes. * Only perform this test if it's version 1.4 or higher. */ public void testScriptedTag () { Parser parser; parser = new Parser (); if (1.4 <= Parser.getVersionNumber ()) { getParameterTableFor("body onLoad=defaultStatus=''"); String name = (String)table.get(Tag.TAGNAME); assertNotNull ("No Tag.TAGNAME", name); assertStringEquals("tag name parsed incorrectly", "BODY", name); String value = (String)table.get ("ONLOAD"); assertStringEquals ("parameter parsed incorrectly", "defaultStatus=''", value); } } } --- NEW FILE: TagParserTest.java --- // HTMLParser Library v1_4_20030601 - A java-based parser for HTML // Copyright (C) Dec 31, 2000 Somik Raha // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // For any questions or suggestions, you can write to me at : // Email :so...@in... // // Postal Address : // Somik Raha // Extreme Programmer & Coach // Industrial Logic, Inc. // 2583 Cedar Street, Berkeley, // CA 94708, USA // Website : http://www.industriallogic.com // ---- IMPORTANT: This class has failing tests ---- // Original Location: org.htmlparser.tests.parserHelperTests; // Pls rememeber to add test back to org.htmlparser.tests.parserHelperTests.AllTests.suite() // and delete these comments when you're done. // ---- NEEDS FIXING ---- package org.htmlparser.tests.temporaryFailures; import java.util.HashMap; import java.util.Map; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.Tag; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.ParserException; public class TagParserTest extends ParserTestCase { private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + "<!-- Server: sf-web2 -->" + "<html lang=\"en\">" + " <head><link rel=\"stylesheet\" type=\"text/css\" href=\"http://sourceforge.net/cssdef.php\">" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">" + " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>" + " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">" + " <!--" + " function help_window(helpurl) {" + " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');" + " }" + " // -->" + " </SCRIPT>" + " <link rel=\"SHORTCUT ICON\" href=\"/images/favicon.ico\">" + "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->" + "<script language=\"JavaScript\" type=\"text/javascript\">" + "<!--" + " function jump(targ,selObj,restore){ //v3.0" + " if (selObj.options[selObj.selectedIndex].value) " + " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");" + " if (restore) selObj.selectedIndex=0;" + " }" + " //-->" + "</script>" + "<a href=\"http://normallink.com/sometext.html\">" + "<style type=\"text/css\">" + "<!--" + "A:link { text-decoration:none }" + "A:visited { text-decoration:none }" + "A:active { text-decoration:none }" + "A:hover { text-decoration:underline; color:#0066FF; }" + "-->" + "</style>" + "</head>" + "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">"; private Map results; private int testProgress; public TagParserTest(String name) { super(name); } public void testTagWithQuotes() throws Exception { String testHtml = "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; createParser(testHtml); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); assertStringEquals( "html", "<IMG BORDER=\"0\" ALT=\"Marshall Field's\" WIDTH=\"87\" SRC=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" HEIGHT=\"20\">", tag.toHtml() ); } public void testEmptyTag() throws Exception { createParser("<custom/>"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("tag name","CUSTOM",tag.getTagName()); assertTrue("empty tag",tag.isEmptyXmlTag()); assertStringEquals( "html", "<CUSTOM/>", tag.toHtml() ); } public void testTagWithCloseTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a>b\">"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a>b",tag.getAttribute("att")); } public void testTagWithOpenTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a<b\">"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a<b",tag.getAttribute("att")); } public void testTagWithSingleQuote() throws ParserException { createParser("<tag att=\'a<b\'>"); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("html","<TAG ATT=\"a<b\">",tag.toHtml()); assertStringEquals("attribute","a<b",tag.getAttribute("att")); } /** * The following multi line test cases are from * bug #725749 Parser does not handle < and > in multi-line attributes * submitted by Joe Robins (zorblak) */ public void testMultiLine1 () throws ParserException { createParser("<meta name=\"foo\" content=\"foo<bar>\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo<bar>\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar>", attribute2); } public void testMultiLine2 () throws ParserException { createParser("<meta name=\"foo\" content=\"foo<bar\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo<bar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar", attribute2); } public void testMultiLine3 () throws ParserException { createParser("<meta name=\"foo\" content=\"foobar>\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foobar>\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foobar>", attribute2); } public void testMultiLine4 () throws ParserException { createParser("<meta name=\"foo\" content=\"foo\nbar>\">"); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo\r\nbar>\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo\r\nbar>", attribute2); } /** * Test multiline tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. * Only perform this test if it's version 1.4 or higher. */ public void testMultiLine5 () throws ParserException { // <meta name="foo" content="<foo> // bar"> createParser("<meta name=\"foo\" content=\"<foo>\nbar\">"); if (1.4 <= Parser.getVersionNumber ()) { parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"<foo>\r\nbar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo>\r\nbar", attribute2); } } /** * Test multiline broken tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. * Only perform this test if it's version 1.4 or higher. */ public void testMultiLine6 () throws ParserException { // <meta name="foo" content="foo> // bar"> createParser("<meta name=\"foo\" content=\"foo>\nbar\">"); if (1.4 <= Parser.getVersionNumber ()) { parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"foo>\r\nbar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo>\r\nbar", attribute2); } } /** * Test multiline split tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. * Only perform this test if it's version 1.4 or higher. */ public void testMultiLine7 () throws ParserException { // <meta name="foo" content="<foo // bar"> createParser("<meta name=\"foo\" content=\"<foo\nbar\""); if (1.4 <= Parser.getVersionNumber ()) { parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; String html = tag.toHtml (); assertStringEquals ("html","<META CONTENT=\"<foo\r\nbar\" NAME=\"foo\">", html); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo\r\nbar", attribute2); } } /** * End of multi line test cases. */ /** * Test multiple threads running against the parser. * See feature request #736144 Handle multi-threaded operation. * Only perform this test if it's version 1.4 or higher. */ public void testThreadSafety() throws Exception { createParser("<html></html>"); if (1.4 <= Parser.getVersionNumber ()) { String testHtml1 = "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" + TEST_HTML; String testHtml2 = "<a href=\"http://normallink.com/sometext.html\">" + TEST_HTML; ParsingThread parsingThread [] = new ParsingThread[100]; results = new HashMap(); testProgress = 0; for (int i=0;i<parsingThread.length;i++) { if (i<parsingThread.length/2) parsingThread[i] = new ParsingThread(i,testHtml1,parsingThread.length); else parsingThread[i] = new ParsingThread(i,testHtml2,parsingThread.length); Thread thread = new Thread(parsingThread[i]); thread.start(); } int completionValue = computeCompletionValue(parsingThread.length); do { try { Thread.sleep(50); } catch (InterruptedException e) { } } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { if (!parsingThread[i].passed()) { assertNotNull("Thread "+i+" link 1",parsingThread[i].getLink1()); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); if (i<parsingThread.length/2) { assertStringEquals( "Thread "+i+", link 1:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink1().getLink() ); assertStringEquals( "Thread "+i+", link 2:", "http://normallink.com/sometext.html", parsingThread[i].getLink2().getLink() ); } else { assertStringEquals( "Thread "+i+", link 1:", "http://normallink.com/sometext.html", parsingThread[i].getLink1().getLink() ); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); assertStringEquals( "Thread "+i+", link 2:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink2().getLink() ); } } } } } private int computeCompletionValue(int numThreads) { return numThreads * (numThreads - 1) / 2; } class ParsingThread implements Runnable { Parser parser; int id; LinkTag link1, link2; boolean result; int max; ParsingThread(int id, String testHtml, int max) { this.id = id; this.max = max; this.parser = Parser.createParser(testHtml); parser.registerScanners(); } public void run() { try { result = false; Node linkTag [] = parser.extractAllNodesThatAre(LinkTag.class); link1 = (LinkTag)linkTag[0]; link2 = (LinkTag)linkTag[1]; if (id<max/2) { if (link1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && link2.getLink().equals("http://normallink.com/sometext.html")) result = true; } else { if (link1.getLink().equals("http://normallink.com/sometext.html") && link2.getLink().equals("http://normallink.com/sometext.html")) result = true; } } catch (ParserException e) { System.err.println("Parser Exception"); e.printStackTrace(); } finally { testProgress += id; } } public LinkTag getLink1() { return link1; } public LinkTag getLink2() { return link2; } public boolean passed() { return result; } } } |