From: <bra...@us...> - 2007-12-15 02:00:47
|
Revision: 2115 http://archive-access.svn.sourceforge.net/archive-access/?rev=2115&view=rev Author: bradtofel Date: 2007-12-14 18:00:51 -0800 (Fri, 14 Dec 2007) Log Message: ----------- BUGFIX: (unreported) regex was not finding simple tags (ex: "<head>")... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2007-12-12 03:34:13 UTC (rev 2114) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2007-12-15 02:00:51 UTC (rev 2115) @@ -100,7 +100,7 @@ Pattern pc = wholeTagPatterns.get(tagName); if (pc == null) { - String tagPatString = "<\\s*" + tagName + "\\s+[^>]+>"; + String tagPatString = "<\\s*" + tagName + "((>)|(\\s+[^>]*>))"; pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); wholeTagPatterns.put(tagName, pc); Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2007-12-12 03:34:13 UTC (rev 2114) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2007-12-15 02:00:51 UTC (rev 2115) @@ -98,7 +98,24 @@ "author2","Bakri Abubakr http://bayanit.com/"); } + public void testFindEndOfFirst() { + findEndOf("<head>","head",6); + findEndOf("<html><head><body>","head",12); + findEndOf("<html><head goo=bar><body>","head",20); + findEndOf("<html><head goo=bar><body>full","body",26); + findEndOf("<html><head goo=bar><body >full","body",27); + findEndOf("<html><head goo=bar><body >full","body",27); + findEndOf("<html><head goo=bar><body yar=bam>full","body",34); + findEndOf("<html><head goo=bar><body yar='bam'>full","body",36); + findEndOf("<html><head goo=bar><body yar=\"bam\">full","body",36); + } + public void findEndOf(String page, String tag, int offset) { + StringBuilder sb = new StringBuilder(page); + int found = TagMagix.getEndOfFirstTag(sb,tag); + assertEquals("FAILED find end of " +tag+ " in ("+page+")",offset,found); + } + /** * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |