From: <vin...@us...> - 2012-01-08 03:09:26
|
Revision: 3591 http://archive-access.svn.sourceforge.net/archive-access/?rev=3591&view=rev Author: vinaygoel Date: 2012-01-08 03:09:20 +0000 (Sun, 08 Jan 2012) Log Message: ----------- Fixed CSS bug with URL too short (String out of bounds exception). Added basic test case for CSS extraction Modified Paths: -------------- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java Added Paths: ----------- trunk/archive-access/projects/archive-commons/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java Property Changed: ---------------- trunk/archive-access/projects/archive-commons/ Property changes on: trunk/archive-access/projects/archive-commons ___________________________________________________________________ Added: svn:ignore + target Modified: trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java =================================================================== --- trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java 2011-12-24 02:33:44 UTC (rev 3590) +++ trunk/archive-access/projects/archive-commons/src/main/java/org/archive/resource/html/ExtractingParseObserver.java 2012-01-08 03:09:20 UTC (rev 3591) @@ -378,6 +378,9 @@ int urlStart = m.start(1); int urlEnd = m.end(1); idx = urlEnd; + if(url.length() < 2) { + continue; + } if ((url.charAt(0) == '(') && (url.charAt(origUrlLength-1) == ')')) { url = url.substring(1, origUrlLength - 1); Added: trunk/archive-access/projects/archive-commons/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java =================================================================== --- trunk/archive-access/projects/archive-commons/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java (rev 0) +++ trunk/archive-access/projects/archive-commons/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java 2012-01-08 03:09:20 UTC (rev 3591) @@ -0,0 +1,98 @@ +package org.archive.resource.html; + +import org.archive.resource.MetaData; +import org.htmlparser.nodes.TextNode; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import junit.framework.TestCase; + +public class ExtractingParseObserverTest extends TestCase { + + public void testHandleStyleNodeExceptions() throws Exception { + String[] tests = { + "some css", + "url()", + "url () ", + "url ('')", + "url (' ')", + "url('\")", + "url(')", + "url('\"')" + }; + boolean except = false; + HTMLMetaData md = new HTMLMetaData(new MetaData()); + ExtractingParseObserver epo = new ExtractingParseObserver(md); + for(String css : tests) { + try { + TextNode tn = new TextNode(css); + epo.handleStyleNode(tn); + } catch(Exception e) { + System.err.format("And the winner is....(%s)\n", css); + e.printStackTrace(); + except = true; + throw e; + } + assertFalse(except); + } + } + public void testHandleStyleNode() throws Exception { + String[][] tests = { + {""}, + {"url(foo.gif)","foo.gif"}, + {"url('foo.gif')","foo.gif"}, + {"url(\"foo.gif\")","foo.gif"}, + {"url(\\\"foo.gif\\\")","foo.gif"}, + {"url(\\'foo.gif\\')","foo.gif"}, + + }; + for(String[] testa : tests) { + checkExtract(testa); + } + // boolean except = false; +// HTMLMetaData md = new HTMLMetaData(new MetaData()); +// ExtractingParseObserver epo = new ExtractingParseObserver(md); +// for(String css : tests) { +// try { +// TextNode tn = new TextNode(css); +// epo.handleStyleNode(tn); +// } catch(Exception e) { +// System.err.format("And the winner is....(%s)\n", css); +// e.printStackTrace(); +// except = true; +// throw e; +// } +// assertFalse(except); +// } + } + private void checkExtract(String[] data) throws JSONException { +// System.err.format("CSS(%s) want[0](%s)\n",css,want[0]); + String css = data[0]; + boolean except = false; + HTMLMetaData md = new HTMLMetaData(new MetaData()); + ExtractingParseObserver epo = new ExtractingParseObserver(md); + try { + TextNode tn = new TextNode(css); + epo.handleStyleNode(tn); + } catch(Exception e) { + fail("Exception with CSS:" + css); + } + JSONArray a = md.optJSONArray("Links"); + if(data.length > 1) { + assertNotNull(a); + assertEquals(data.length-1,a.length()); + for(int i = 1; i < data.length; i++) { + Object o = a.optJSONObject(i-1); + + assertTrue(o instanceof JSONObject); + JSONObject jo = (JSONObject) o; + assertEquals(data[i],jo.getString("href")); + } + } else { + assertNull(a); + } + } + + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |