[Htmlparser-cvs] htmlparser/src/org/htmlparser/tests/filterTests FilterTest.java,1.5,1.6
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-05-24 19:36:34
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/filterTests In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv582/tests/filterTests Modified Files: FilterTest.java Log Message: Add regular expression filter. Index: FilterTest.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/filterTests/FilterTest.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** FilterTest.java 24 May 2004 16:18:31 -0000 1.5 --- FilterTest.java 24 May 2004 19:36:23 -0000 1.6 *************** *** 35,38 **** --- 35,39 ---- import org.htmlparser.filters.NotFilter; import org.htmlparser.filters.OrFilter; + import org.htmlparser.filters.RegexFilter; import org.htmlparser.filters.StringFilter; import org.htmlparser.filters.TagNameFilter; *************** *** 271,274 **** --- 272,333 ---- assertEquals ("wrong count", 2, count); } + + /** + * Test regular expression matching: + */ + public void testRegularExpression () throws Exception + { + String target = + "\n" + + "\n" + + "Most recently, in the Western Conference final, the Flames knocked off \n" + + "the San Jose Sharks, the Pacific Division champions, to become the first \n" + + "Canadian team to reach the Stanley Cup Championship series since 1994."; + + String html = + "<html><head><title>CBC Sports Online: NHL Playoffs</title></head>" + + "<body><h1>CBC SPORTS ONLINE</h1>\n" + + "The Calgary Flames have already defeated three NHL division winners \n" + + "during their improbable playoff run. If they are to hoist the Stanley \n" + + "Cup they'll have to go through one more. <p><table ALIGN=\"Right\" width=196 CELLPADDING=0 cellspacing=0 hspace=4> <tr><td><img src=\"/gfx/topstory/sports/iginla_j0524.jpg\" width=194 height=194 hspace=3 border=1><br>\n" + + "\n" + + "<font SIZE=\"1\" FACE=\"verdana,arial\">\n" + + "Jarome Iginla skates during the Flames' practice on Monday. Calgary takes on the Tampa Bay Lightning in the Stanley Cup finals beginning Tuesday night in Tampa\n" + + "</font></td></tr></table>\n" + + "\n" + + "\n" + + "In the post-season's first round, the Flames defeated the Vancouver \n" + + "Canucks, the Northwest Division winners, in seven tough games. <p>\n" + + "\n" + + "In Round 2 it was the Detroit Red Wings, who not only won the Central \n" + + "Division, but also boasted the NHL's best overall record during the \n" + + "regular season, who fell to the Flames. <p>" + + target + + "<p>\n" + + "\n" + + "Up next for the Flames is the Tampa Bay Lighting -- the runaway winners \n" + + "of the NHL's Southeast Division and the Eastern Conference's best team \n" + + "during the regular season. <p>\n" + + "\n" + + "The Lighting advanced by beating the Philadelphia Flyers in the Eastern \n" + + "Conference final. <p>\n" + + "</body></html>\n"; + Lexer lexer; + Parser parser; + RegexFilter filter; + NodeIterator iterator; + int count; + + lexer = new Lexer (html); + parser = new Parser (lexer); + filter = new RegexFilter ("(19|20)\\d\\d([- \\\\/.](0[1-9]|1[012])[- \\\\/.](0[1-9]|[12][0-9]|3[01]))?"); + count = 0; + for (iterator = parser.extractAllNodesThatMatch (filter).elements (); iterator.hasMoreNodes ();) + { + assertEquals ("text wrong", target, iterator.nextNode ().toHtml ()); + count++; + } + assertEquals ("wrong count", 1, count); + } } |