You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
Revision: 3030 http://archive-access.svn.sourceforge.net/archive-access/?rev=3030&view=rev Author: bradtofel Date: 2010-04-09 02:01:13 +0000 (Fri, 09 Apr 2010) Log Message: ----------- TWEAK: removed unused imports Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2010-04-05 23:43:16 UTC (rev 3029) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2010-04-09 02:01:13 UTC (rev 3030) @@ -27,16 +27,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URIException; -import org.archive.net.UURIFactory; -import org.archive.wayback.ResultURIConverter; import org.archive.wayback.archivalurl.ArchivalUrlRequestParser; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BetterRequestException; import org.archive.wayback.requestparser.BaseRequestParser; import org.archive.wayback.requestparser.PathRequestParser; import org.archive.wayback.util.Timestamp; -import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.webapp.AccessPoint; /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-05 23:43:22
|
Revision: 3029 http://archive-access.svn.sourceforge.net/archive-access/?rev=3029&view=rev Author: bradtofel Date: 2010-04-05 23:43:16 +0000 (Mon, 05 Apr 2010) Log Message: ----------- FEATURE: added configuration of a liveWebPrefix. If configured, a NotInArchiveException is redirected to: PREFIX+MISSING_URL which is assumed to be handled by another AccessPoint that can replay content from the live web. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-05 23:36:47 UTC (rev 3028) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-05 23:43:16 UTC (rev 3029) @@ -82,6 +82,8 @@ private static final Logger LOGGER = Logger.getLogger( AccessPoint.class.getName()); + private String liveWebPrefix = null; + private boolean useServerName = false; private boolean useAnchorWindow = false; private boolean exactSchemeMatch = true; @@ -243,7 +245,7 @@ * @return the portion of the request following the path to this context * without leading '/' */ - private String translateRequest(HttpServletRequest httpRequest, + protected String translateRequest(HttpServletRequest httpRequest, boolean includeQuery) { String origRequestPath = httpRequest.getRequestURI(); @@ -334,7 +336,7 @@ return getAbsoluteContextPrefix(httpRequest, useServerName); } - private boolean dispatchLocal(HttpServletRequest httpRequest, + protected boolean dispatchLocal(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { @@ -418,11 +420,20 @@ handled = true; } catch(WaybackException e) { - logNotInArchive(e,wbRequest); - exception.renderException(httpRequest, httpResponse, wbRequest, e, - uriConverter); + boolean drawError = true; + if(e instanceof ResourceNotInArchiveException) { + if(liveWebPrefix != null) { + String liveUrl = liveWebPrefix + wbRequest.getRequestUrl(); + httpResponse.sendRedirect(liveUrl); + drawError = false; + } + } + if(drawError) { + logNotInArchive(e,wbRequest); + exception.renderException(httpRequest, httpResponse, wbRequest, e, + uriConverter); + } } - return handled; } @@ -706,4 +717,18 @@ public void setExactHostMatch(boolean exactHostMatch) { this.exactHostMatch = exactHostMatch; } + + /** + * @return the liveWebPrefix + */ + public String getLiveWebPrefix() { + return liveWebPrefix; + } + + /** + * @param liveWebPrefix the liveWebPrefix to set + */ + public void setLiveWebPrefix(String liveWebPrefix) { + this.liveWebPrefix = liveWebPrefix; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-05 23:36:54
|
Revision: 3028 http://archive-access.svn.sourceforge.net/archive-access/?rev=3028&view=rev Author: bradtofel Date: 2010-04-05 23:36:47 +0000 (Mon, 05 Apr 2010) Log Message: ----------- BUGFIX(unreported) added Vary header to try to keep server-relative redirects from getting improperly cached. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/RequestFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/RequestFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/RequestFilter.java 2010-04-05 23:25:55 UTC (rev 3027) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/RequestFilter.java 2010-04-05 23:36:47 UTC (rev 3028) @@ -148,6 +148,10 @@ // cross your fingers!!! LOGGER.info("Server-Relative-Redirect:\t" + referer + "\t" + thisPath + "\t" + finalUrl); + + // Gotta make sure this is properly cached, or + // weird things happen: + httpResponse.addHeader("Vary", "Referer"); httpResponse.sendRedirect(finalUrl); handled = true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-05 23:26:01
|
Revision: 3027 http://archive-access.svn.sourceforge.net/archive-access/?rev=3027&view=rev Author: bradtofel Date: 2010-04-05 23:25:55 +0000 (Mon, 05 Apr 2010) Log Message: ----------- TWEAK: removed unused imports Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2010-04-05 23:25:12 UTC (rev 3026) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2010-04-05 23:25:55 UTC (rev 3027) @@ -27,10 +27,8 @@ import java.util.ArrayList; import java.util.Iterator; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.util.ObjectFilter; /** * Class that provides SearchResult Filtering based on multiple Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2010-04-05 23:25:12 UTC (rev 3026) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2010-04-05 23:25:55 UTC (rev 3027) @@ -24,9 +24,7 @@ */ package org.archive.wayback.accesscontrol; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.util.ObjectFilter; /** * * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3026 http://archive-access.svn.sourceforge.net/archive-access/?rev=3026&view=rev Author: bradtofel Date: 2010-04-05 23:25:12 +0000 (Mon, 05 Apr 2010) Log Message: ----------- TWEAK: removed unused imports Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-04-05 23:24:09 UTC (rev 3025) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-04-05 23:25:12 UTC (rev 3026) @@ -32,7 +32,6 @@ import org.archive.util.ArchiveUtils; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.util.ObjectFilter; /** * @author brad @@ -46,7 +45,7 @@ private final static String POLICY_BLOCK = "block"; private final static String POLICY_ROBOT = "robots"; private boolean notifiedRobotSeen = false; - private boolean notifiedRobotPassed = false; +// private boolean notifiedRobotPassed = false; private boolean notifiedAdminSeen = false; private boolean notifiedAdminPassed = false; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3025 http://archive-access.svn.sourceforge.net/archive-access/?rev=3025&view=rev Author: bradtofel Date: 2010-04-05 23:24:09 +0000 (Mon, 05 Apr 2010) Log Message: ----------- TWEAK: removed unused imports Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java 2010-04-05 23:23:44 UTC (rev 3024) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java 2010-04-05 23:24:09 UTC (rev 3025) @@ -25,9 +25,7 @@ package org.archive.wayback.accesscontrol.oracleclient; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.util.ObjectFilter; /** * ExclusionFilterFactory implementation which connects to an Exclusion Oracle This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3024 http://archive-access.svn.sourceforge.net/archive-access/?rev=3024&view=rev Author: bradtofel Date: 2010-04-05 23:23:44 +0000 (Mon, 05 Apr 2010) Log Message: ----------- LOGGING tweaks Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2010-04-05 23:23:21 UTC (rev 3023) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2010-04-05 23:23:44 UTC (rev 3024) @@ -112,7 +112,7 @@ current = new ArrayList<String>(); } rules.put(ua, current); - LOGGER.info("Found User-agent(" + ua + ") rules..."); + LOGGER.trace("Found User-agent(" + ua + ") rules..."); continue; } if (read.matches("(?i)Disallow:.*")) { @@ -145,11 +145,12 @@ return false; } else { - LOGGER.info("UA(" + curUA + ") has (" + LOGGER.trace("UA(" + curUA + ") has (" + disallowedPath + ") blocked...(" + disallowedPath.length() + ")"); if (disallowedPath.equals("/") || path.startsWith(disallowedPath)) { - LOGGER.info("THIS APPLIES!!!"); + LOGGER.info("Rule(" + disallowedPath + ") applies to (" + + path + ")"); return true; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3023 http://archive-access.svn.sourceforge.net/archive-access/?rev=3023&view=rev Author: bradtofel Date: 2010-04-05 23:23:21 +0000 (Mon, 05 Apr 2010) Log Message: ----------- LOGGING tweaks Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-04-05 23:22:41 UTC (rev 3022) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-04-05 23:23:21 UTC (rev 3023) @@ -176,7 +176,7 @@ firstUrlString = urlString; } if(rulesCache.containsKey(urlString)) { - LOGGER.info("ROBOT: Cached("+urlString+")"); + LOGGER.fine("ROBOT: Cached("+urlString+")"); rules = rulesCache.get(urlString); if(!urlString.equals(firstUrlString)) { LOGGER.info("Adding extra url("+firstUrlString+") for prev cached rules("+urlString+")"); @@ -184,7 +184,7 @@ } } else { try { - LOGGER.info("ROBOT: NotCached("+urlString+")"); + LOGGER.info("ROBOT: NotCached - Downloading("+urlString+")"); tmpRules = new RobotRules(); Resource resource = webCache.getCachedResource(new URL(urlString), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-05 23:22:48
|
Revision: 3022 http://archive-access.svn.sourceforge.net/archive-access/?rev=3022&view=rev Author: bradtofel Date: 2010-04-05 23:22:41 +0000 (Mon, 05 Apr 2010) Log Message: ----------- TWEAK: removed unused imports Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2010-04-02 03:30:31 UTC (rev 3021) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2010-04-05 23:22:41 UTC (rev 3022) @@ -25,9 +25,7 @@ package org.archive.wayback.accesscontrol.remote; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.util.ObjectFilter; /** * Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2010-04-02 03:30:31 UTC (rev 3021) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2010-04-05 23:22:41 UTC (rev 3022) @@ -25,10 +25,8 @@ package org.archive.wayback.accesscontrol.robotstxt; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.util.ObjectFilter; /** * Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-04-02 03:30:31 UTC (rev 3021) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-04-05 23:22:41 UTC (rev 3022) @@ -32,11 +32,9 @@ import org.apache.log4j.Logger; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.flatfile.FlatFile; /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 03:30:38
|
Revision: 3021 http://archive-access.svn.sourceforge.net/archive-access/?rev=3021&view=rev Author: bradtofel Date: 2010-04-02 03:30:31 +0000 (Fri, 02 Apr 2010) Log Message: ----------- BUGFIX(unreported) now definitely throws an AdministrativeExclusionException if the ExclusionFilterFactory returns null, and also attents to shutdown() the ExclusionFilterFactory, if configured. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-02 03:28:28 UTC (rev 3020) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-02 03:30:31 UTC (rev 3021) @@ -49,12 +49,14 @@ import org.archive.wayback.core.UIResults; import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AdministrativeAccessControlException; import org.archive.wayback.exception.AuthenticationControlException; import org.archive.wayback.exception.BaseExceptionRenderer; import org.archive.wayback.exception.BetterRequestException; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.operator.BooleanOperator; import org.springframework.beans.factory.BeanNameAware; @@ -385,7 +387,12 @@ } if(exclusionFactory != null) { - wbRequest.setExclusionFilter(exclusionFactory.get()); + ExclusionFilter exclusionFilter = exclusionFactory.get(); + if(exclusionFilter == null) { + throw new AdministrativeAccessControlException( + "AccessControl list unavailable"); + } + wbRequest.setExclusionFilter(exclusionFilter); } // TODO: refactor this into RequestParser implementations, so a // user could alter requests to change the behavior within a @@ -483,6 +490,9 @@ if(collection != null) { collection.shutdown(); } + if(exclusionFactory != null) { + exclusionFactory.shutdown(); + } } private void logNotInArchive(WaybackException e, WaybackRequest r) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 03:28:34
|
Revision: 3020 http://archive-access.svn.sourceforge.net/archive-access/?rev=3020&view=rev Author: bradtofel Date: 2010-04-02 03:28:28 +0000 (Fri, 02 Apr 2010) Log Message: ----------- BUGFIX(unreported): AdaptedIterator implementation which converted an Iterator<CaptureSearchResult> to an Iterator<UrlSearchResult> was not returning the last record... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2010-04-02 03:19:58 UTC (rev 3019) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2010-04-02 03:28:28 UTC (rev 3020) @@ -43,7 +43,7 @@ import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; -import org.archive.wayback.resourceindex.adapters.CaptureToUrlSearchResultAdapter; +import org.archive.wayback.resourceindex.adapters.CaptureToUrlSearchResultIterator; import org.archive.wayback.resourceindex.filterfactory.AccessPointCaptureFilterGroupFactory; import org.archive.wayback.resourceindex.filterfactory.CaptureFilterGroup; import org.archive.wayback.resourceindex.filterfactory.CoreCaptureFilterGroupFactory; @@ -242,7 +242,7 @@ uFilters.addFilters(window.getFilters()); CloseableIterator<UrlSearchResult> itrU = new ObjectFilterIterator<UrlSearchResult>( - CaptureToUrlSearchResultAdapter.adaptCaptureIterator(itrC), + new CaptureToUrlSearchResultIterator(itrC), uFilters); while(itrU.hasNext()) { Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java 2010-04-02 03:19:58 UTC (rev 3019) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java 2010-04-02 03:28:28 UTC (rev 3020) @@ -1,115 +0,0 @@ -/* CaptureToUrlSearchResultAdapter - * - * $Id$ - * - * Created on 4:45:55 PM Jun 28, 2008. - * - * Copyright (C) 2008 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.adapters; - -import java.util.HashMap; - -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.core.UrlSearchResult; -import org.archive.wayback.util.AdaptedIterator; -import org.archive.wayback.util.Adapter; -import org.archive.wayback.util.CloseableIterator; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class CaptureToUrlSearchResultAdapter - implements Adapter<CaptureSearchResult, UrlSearchResult> { - - private String currentUrl; - private String originalUrl; - private String firstCapture; - private String lastCapture; - private int numCaptures; - private HashMap<String,Object> digests; - private UrlSearchResult resultRef = null; - public CaptureToUrlSearchResultAdapter() { - - } - private UrlSearchResult makeUrlSearchResult(CaptureSearchResult result) { - currentUrl = result.getUrlKey(); - originalUrl = result.getOriginalUrl(); - firstCapture = result.getCaptureTimestamp(); - lastCapture = firstCapture; - digests = new HashMap<String,Object>(); - digests.put(result.getDigest(),null); - numCaptures = 1; - - resultRef = new UrlSearchResult(); - resultRef.setUrlKey(currentUrl); - resultRef.setOriginalUrl(originalUrl); - resultRef.setFirstCapture(firstCapture); - resultRef.setLastCapture(lastCapture); - resultRef.setNumCaptures(1); - resultRef.setNumVersions(1); - return resultRef; - } - - /* (non-Javadoc) - * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) - */ - public UrlSearchResult adapt(CaptureSearchResult c) { - String urlKey = c.getUrlKey(); - if(resultRef == null || !currentUrl.equals(urlKey)) { - return makeUrlSearchResult(c); - } - - // same url -- accumulate into the last one we returned: - String captureDate = c.getCaptureTimestamp(); - if(captureDate.compareTo(firstCapture) < 0) { - firstCapture = captureDate; - resultRef.setFirstCapture(firstCapture); - } - if(captureDate.compareTo(lastCapture) > 0) { - lastCapture = captureDate; - resultRef.setLastCapture(lastCapture); - } - numCaptures++; - digests.put(c.getDigest(), null); - resultRef.setNumCaptures(numCaptures); - resultRef.setNumVersions(digests.size()); - return null; - } - public static CloseableIterator<UrlSearchResult> adaptCaptureIterator( - CloseableIterator<CaptureSearchResult> itr) { - - // HACKHACK: this is pretty lame. We return an UrlSearchResult the - // first time we see a new urlKey, and cache a reference to the returned - // UrlSearchResult, updating it as we see subsequent CaptureSearchResult - // objects with the same urlKey. - // This means that users of the returned UrlSearchResult need to wait - // until they've got the *next* returned UrlSearchResult before using - // the *previous* UrlSearchResult. - // At the moment, this all happens inside a LocalResourceIndex, so - // none of the UrlSearchResult objects should be seen/used in any - // significant way before they've all be accumulated into an - // UrlSearchResults object.. - return new AdaptedIterator<CaptureSearchResult,UrlSearchResult>(itr, - new CaptureToUrlSearchResultAdapter()); - } -} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java 2010-04-02 03:28:28 UTC (rev 3020) @@ -0,0 +1,138 @@ +/* CaptureToUrlSearchResultIterator + * + * $Id$: + * + * Created on Mar 31, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.adapters; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.log4j.Logger; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.UrlSearchResult; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.PeekableIterator; + +/** + * @author brad + * + */ +public class CaptureToUrlSearchResultIterator implements CloseableIterator<UrlSearchResult> { + private static final Logger LOGGER = Logger.getLogger( + CaptureToUrlSearchResultIterator.class.getName()); + private PeekableIterator<CaptureSearchResult> peek = null; + UrlSearchResult cachedNext = null; + /** + * @param itr possibly closeable iterator of CaptureSearchResult objects + */ + public CaptureToUrlSearchResultIterator(Iterator<CaptureSearchResult> itr) { + peek = new PeekableIterator<CaptureSearchResult>(itr); + } + /* (non-Javadoc) + * @see java.util.Iterator#hasNext() + */ + public boolean hasNext() { + createNext(); + return (cachedNext != null); + } + + private void createNext() { + if(cachedNext == null) { + if(peek.hasNext()) { + // populate + CaptureSearchResult captureResult = peek.next(); + String currentKey = captureResult.getUrlKey(); + String originalUrl = captureResult.getOriginalUrl(); + String firstCapture = captureResult.getCaptureTimestamp(); + LOGGER.info("Creating new UrlResult:" + currentKey + " " + + firstCapture); + String lastCapture = firstCapture; + HashMap<String,Object> digests = new HashMap<String,Object>(); + digests.put(captureResult.getDigest(),null); + int numCaptures = 1; + + cachedNext = new UrlSearchResult(); + cachedNext.setUrlKey(currentKey); + cachedNext.setOriginalUrl(originalUrl); + + // now rip through the rest until we find either the last + // in the iterator, or the first having a different urlKey: + while((captureResult = peek.peekNext()) != null) { + String urlKey = captureResult.getUrlKey(); + if(currentKey.equals(urlKey)) { + // remove from iterator, and accumulate: + peek.next(); + numCaptures++; + digests.put(captureResult.getDigest(), null); + + String captureTS = captureResult.getCaptureTimestamp(); + if(captureTS.compareTo(firstCapture) < 0) { + firstCapture = captureTS; + } + if(captureTS.compareTo(lastCapture) > 0) { + lastCapture = captureTS; + } + + } else { + // all done. leave the next result and stop processing: + LOGGER.info("Hit next urlKey. Cur("+currentKey+") new(" + + urlKey + ")"); + break; + } + } + cachedNext.setFirstCapture(firstCapture); + cachedNext.setLastCapture(lastCapture); + cachedNext.setNumCaptures(numCaptures); + cachedNext.setNumVersions(digests.size()); + } + } + } + /* (non-Javadoc) + * @see java.util.Iterator#next() + */ + public UrlSearchResult next() { + if(cachedNext == null) { + throw new NoSuchElementException("use hasNext!"); + } + UrlSearchResult tmp = cachedNext; + cachedNext = null; + return tmp; + } + + /* (non-Javadoc) + * @see java.util.Iterator#remove() + */ + public void remove() { + throw new UnsupportedOperationException(); + } + + /* (non-Javadoc) + * @see java.io.Closeable#close() + */ + public void close() throws IOException { + peek.close(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3019 http://archive-access.svn.sourceforge.net/archive-access/?rev=3019&view=rev Author: bradtofel Date: 2010-04-02 03:19:58 +0000 (Fri, 02 Apr 2010) Log Message: ----------- test for null pointer before using filterGroup property Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-04-02 03:19:24 UTC (rev 3018) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-04-02 03:19:58 UTC (rev 3019) @@ -90,29 +90,39 @@ if(policy.equals(POLICY_ALLOW)) { if(!notifiedAdminSeen) { notifiedAdminSeen = true; - filterGroup.setSawAdministrative(); + if(filterGroup != null) { + filterGroup.setSawAdministrative(); + } } if(!notifiedAdminPassed) { notifiedAdminPassed = true; - filterGroup.setPassedAdministrative(); + if(filterGroup != null) { + filterGroup.setPassedAdministrative(); + } } return FILTER_INCLUDE; } else if(policy.equals(POLICY_BLOCK)) { if(!notifiedAdminSeen) { notifiedAdminSeen = true; - filterGroup.setSawAdministrative(); + if(filterGroup != null) { + filterGroup.setSawAdministrative(); + } } return FILTER_EXCLUDE; } else if(policy.equals(POLICY_ROBOT)) { if(!notifiedRobotSeen) { notifiedRobotSeen = true; - filterGroup.setSawRobots(); + if(filterGroup != null) { + filterGroup.setSawRobots(); + } } return FILTER_INCLUDE; // if(robotFilter != null) { // if(!notifiedRobotPassed) { // notifiedRobotPassed = true; -// filterGroup.setPassedRobot(); +// if(filterGroup != null) { +// filterGroup.setPassedRobot(); +// } // } // return robotFilter.filterObject(o); // } else { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3018 http://archive-access.svn.sourceforge.net/archive-access/?rev=3018&view=rev Author: bradtofel Date: 2010-04-02 03:19:24 +0000 (Fri, 02 Apr 2010) Log Message: ----------- test for null pointer before using filterGroup property Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-04-02 03:18:34 UTC (rev 3017) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-04-02 03:19:24 UTC (rev 3018) @@ -230,7 +230,9 @@ */ public int filterObject(CaptureSearchResult r) { if(!notifiedSeen) { - filterGroup.setSawRobots(); + if(filterGroup != null) { + filterGroup.setSawRobots(); + } notifiedSeen = true; } int filterResult = ObjectFilter.FILTER_EXCLUDE; @@ -242,7 +244,9 @@ url = new URL(ArchiveUtils.addImpliedHttpIfNecessary(resultURL)); if(!rules.blocksPathForUA(url.getPath(), userAgent)) { if(!notifiedPassed) { - filterGroup.setPassedRobots(); + if(filterGroup != null) { + filterGroup.setPassedRobots(); + } notifiedPassed = true; } filterResult = ObjectFilter.FILTER_INCLUDE; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 03:18:40
|
Revision: 3017 http://archive-access.svn.sourceforge.net/archive-access/?rev=3017&view=rev Author: bradtofel Date: 2010-04-02 03:18:34 +0000 (Fri, 02 Apr 2010) Log Message: ----------- BUGFIX: now definitely causes a complete failure of resourceIndex queries when the adminstrative exclusion file is missing. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-04-02 03:16:47 UTC (rev 3016) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-04-02 03:18:34 UTC (rev 3017) @@ -81,7 +81,9 @@ */ public int filterObject(CaptureSearchResult r) { if(!notifiedSeen) { - filterGroup.setSawAdministrative(); + if(filterGroup != null) { + filterGroup.setSawAdministrative(); + } notifiedSeen = true; } String url = r.getOriginalUrl(); @@ -102,7 +104,9 @@ return ObjectFilter.FILTER_EXCLUDE; } else { if(!notifiedPassed) { - filterGroup.setPassedAdministrative(); + if(filterGroup != null) { + filterGroup.setPassedAdministrative(); + } notifiedPassed = true; } return ObjectFilter.FILTER_INCLUDE; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-04-02 03:16:47 UTC (rev 3016) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-04-02 03:18:34 UTC (rev 3017) @@ -73,6 +73,9 @@ protected void reloadFile() throws IOException { long currentMod = file.lastModified(); if(currentMod == lastUpdated) { + if(currentMod == 0) { + LOGGER.error("No exclude file at " + file.getAbsolutePath()); + } return; } LOGGER.info("Reloading exclusion file " + file.getAbsolutePath()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 03:16:53
|
Revision: 3016 http://archive-access.svn.sourceforge.net/archive-access/?rev=3016&view=rev Author: bradtofel Date: 2010-04-02 03:16:47 +0000 (Fri, 02 Apr 2010) Log Message: ----------- FEATURE: added addDefaults(boolean) which allows skipping of reverse lookups to find all hostnames for the local machine. This operation can take upwards of 10 seconds, really slowing down startup/restart time. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2010-04-02 03:14:13 UTC (rev 3015) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2010-04-02 03:16:47 UTC (rev 3016) @@ -44,6 +44,7 @@ public class ProxyReplayRequestParser extends WrappedRequestParser { private List<String> localhostNames = null; + private boolean addDefaults = true; /** * @param wrapped @@ -59,7 +60,9 @@ if(localhostNames == null) { localhostNames = InetAddressUtil.getAllLocalHostNames(); } else { - localhostNames.addAll(InetAddressUtil.getAllLocalHostNames()); + if(addDefaults) { + localhostNames.addAll(InetAddressUtil.getAllLocalHostNames()); + } } } private boolean isLocalRequest(HttpServletRequest httpRequest) { @@ -104,4 +107,17 @@ this.localhostNames = localhostNames; } + /** + * @return the addDefaults + */ + public boolean isAddDefaults() { + return addDefaults; + } + + /** + * @param addDefaults the addDefaults to set + */ + public void setAddDefaults(boolean addDefaults) { + this.addDefaults = addDefaults; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java 2010-04-02 03:14:13 UTC (rev 3015) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java 2010-04-02 03:16:47 UTC (rev 3016) @@ -79,4 +79,17 @@ } return wbRequest; } + /** + * @return the addDefaults + */ + public boolean isAddDefaults() { + return prrp.isAddDefaults(); + } + + /** + * @param addDefaults the addDefaults to set + */ + public void setAddDefaults(boolean addDefaults) { + prrp.setAddDefaults(addDefaults); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 03:14:19
|
Revision: 3015 http://archive-access.svn.sourceforge.net/archive-access/?rev=3015&view=rev Author: bradtofel Date: 2010-04-02 03:14:13 +0000 (Fri, 02 Apr 2010) Log Message: ----------- FEATURE: added discardStream() which reads a stream till it's empty, throwing away the data read Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2010-04-02 03:12:04 UTC (rev 3014) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2010-04-02 03:14:13 UTC (rev 3015) @@ -47,6 +47,16 @@ } return true; } + + public static void discardStream(InputStream is) throws IOException { + discardStream(is,BUFFER_SIZE); + } + public static void discardStream(InputStream is,int size) throws IOException { + byte[] buffer = new byte[size]; + while(is.read(buffer, 0, size) != -1) { + } + } + /** * Write all bytes from is to os. Does not close either stream. * @param is to copy bytes from This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3014 http://archive-access.svn.sourceforge.net/archive-access/?rev=3014&view=rev Author: bradtofel Date: 2010-04-02 03:12:04 +0000 (Fri, 02 Apr 2010) Log Message: ----------- Splelling fix Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2010-04-02 03:10:33 UTC (rev 3013) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2010-04-02 03:12:04 UTC (rev 3014) @@ -30,7 +30,7 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; -import org.archive.wayback.exception.AdminstrativeAccessControlException; +import org.archive.wayback.exception.AdministrativeAccessControlException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.RobotAccessControlException; import org.archive.wayback.resourceindex.filters.CounterFilter; @@ -78,20 +78,9 @@ " is blocked by the sites robots.txt file"); } if(sawAdministrative && !passedAdministrative) { - throw new AdminstrativeAccessControlException(requestUrl + + throw new AdministrativeAccessControlException(requestUrl + " is not available in the Wayback Machine."); } - -// if(postCounter.getNumMatched() == 0) { -// -// // nothing got to the counter after exclusions. If we have -// // exclusions (detected by preCounter being non-null, and the -// // preCounter passed any results, then they were all filtered by -// // the exclusions filter. -// if(preCounter != null && preCounter.getNumMatched() > 0) { -// throw new AccessControlException("All results Excluded"); -// } -// } } public void setPassedRobots() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3013 http://archive-access.svn.sourceforge.net/archive-access/?rev=3013&view=rev Author: bradtofel Date: 2010-04-02 03:10:33 +0000 (Fri, 02 Apr 2010) Log Message: ----------- LOG: added log warnings if it fails to convert a line because of a numberformatexception in the offset field - which is now caught instead of throwing an exception. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2010-04-02 03:08:24 UTC (rev 3012) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2010-04-02 03:10:33 UTC (rev 3013) @@ -25,6 +25,7 @@ package org.archive.wayback.resourceindex.cdx; +import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.url.UrlOperations; @@ -37,6 +38,8 @@ */ public class CDXLineToSearchResultAdapter implements Adapter<String,CaptureSearchResult> { + private static final Logger LOGGER = Logger.getLogger( + CDXLineToSearchResultAdapter.class.getName()); private final static String SCHEME_STRING = "://"; private final static String DEFAULT_SCHEME = "http://"; @@ -103,7 +106,13 @@ } if(!tokens[nextToken].equals("-")) { - compressedOffset = Long.parseLong(tokens[nextToken]); + try { + compressedOffset = Long.parseLong(tokens[nextToken]); + } catch (NumberFormatException e) { + LOGGER.warn("Bad compressed Offset field("+nextToken+") in (" + + line +")"); + return null; + } } nextToken++; String fileName = tokens[nextToken]; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3012 http://archive-access.svn.sourceforge.net/archive-access/?rev=3012&view=rev Author: bradtofel Date: 2010-04-02 03:08:24 +0000 (Fri, 02 Apr 2010) Log Message: ----------- BUGFIX: no longer filters results unless they redirect to the same scheme. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2010-04-02 03:07:20 UTC (rev 3011) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2010-04-02 03:08:24 UTC (rev 3012) @@ -29,6 +29,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * SearchResultFilter which INCLUDEs all records, unless they redirect to @@ -59,7 +60,14 @@ try { String redirectKey = canonicalizer.urlStringToKey(redirect); if(redirectKey.compareTo(urlKey) == 0) { - return FILTER_EXCLUDE; + // only omit if same scheme: + String origScheme = + UrlOperations.urlToScheme(r.getOriginalUrl()); + String redirScheme = + UrlOperations.urlToScheme(redirect); + if(origScheme.compareTo(redirScheme) == 0) { + return FILTER_EXCLUDE; + } } } catch (URIException e) { // emit message (is that right?) and continue This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3011 http://archive-access.svn.sourceforge.net/archive-access/?rev=3011&view=rev Author: bradtofel Date: 2010-04-02 03:07:20 +0000 (Fri, 02 Apr 2010) Log Message: ----------- LOGGING: added a log warning if the guardrail filter kicks in. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java 2010-04-02 03:06:28 UTC (rev 3010) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java 2010-04-02 03:07:20 UTC (rev 3011) @@ -24,6 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; +import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; @@ -35,6 +36,8 @@ * @version $Date$, $Revision$ */ public class GuardRailFilter implements ObjectFilter<CaptureSearchResult> { + private static final Logger LOGGER = Logger.getLogger( + GuardRailFilter.class.getName()); private int maxRecordsToScan = 0; private int recordsScanned = 0; @@ -52,6 +55,8 @@ public int filterObject(CaptureSearchResult r) { recordsScanned++; if(recordsScanned > maxRecordsToScan) { + LOGGER.warn("Hit max requests on " + r.getUrlKey() + " " + + r.getCaptureTimestamp()); return FILTER_ABORT; } return FILTER_INCLUDE; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 03:06:34
|
Revision: 3010 http://archive-access.svn.sourceforge.net/archive-access/?rev=3010&view=rev Author: bradtofel Date: 2010-04-02 03:06:28 +0000 (Fri, 02 Apr 2010) Log Message: ----------- Many unreported bugfixes, slight change of interface to allow grabbing an iterator of String(lines), added a main() method, and added a truncated() method to the iterators, currently not exposed enough to be useful, but potentially allowing an external user to determine if the search was cut off because too many blocks had to be searched. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java 2010-04-02 02:53:44 UTC (rev 3009) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java 2010-04-02 03:06:28 UTC (rev 3010) @@ -39,10 +39,17 @@ Iterator<String> inner = null; private String cachedNext = null; private boolean done = false; + private boolean truncated = false; public StringPrefixIterator(Iterator<String> inner, String prefix) { this.prefix = prefix; this.inner = inner; + if(inner instanceof ZiplinesChunkIterator) { + truncated = ((ZiplinesChunkIterator)inner).isTruncated(); + } } + public boolean isTruncated() { + return truncated; + } /* (non-Javadoc) * @see java.util.Iterator#hasNext() */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-04-02 02:53:44 UTC (rev 3009) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-04-02 03:06:28 UTC (rev 3010) @@ -32,11 +32,16 @@ import java.net.URLConnection; import java.util.zip.GZIPInputStream; +import org.apache.log4j.Logger; + /** * @author brad * */ public class ZiplinedBlock { + private static final Logger LOGGER = Logger.getLogger( + ZiplinedBlock.class.getName()); + String urlOrPath = null; long offset = -1; public final static int BLOCK_SIZE = 128 * 1024; @@ -56,11 +61,13 @@ * @throws IOException for usual reasons */ public BufferedReader readBlock() throws IOException { - URL u = new URL(urlOrPath); - URLConnection uc = u.openConnection(); StringBuilder sb = new StringBuilder(16); sb.append(BYTES_HEADER).append(offset).append(BYTES_MINUS); sb.append((offset + BLOCK_SIZE)-1); + LOGGER.trace("Reading block:" + urlOrPath + "("+sb.toString()+")"); + // TODO: timeouts + URL u = new URL(urlOrPath); + URLConnection uc = u.openConnection(); uc.setRequestProperty(RANGE_HEADER, sb.toString()); return new BufferedReader(new InputStreamReader( new GZIPInputStream(uc.getInputStream()))); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2010-04-02 02:53:44 UTC (rev 3009) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2010-04-02 03:06:28 UTC (rev 3010) @@ -37,20 +37,27 @@ import java.util.RandomAccess; import java.util.zip.GZIPInputStream; +import org.apache.log4j.Logger; import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.webapp.AccessPoint; /** * @author brad * */ public class ZiplinesChunkIterator implements CloseableIterator<String> { + private static final Logger LOGGER = Logger.getLogger( + ZiplinesChunkIterator.class.getName()); + private BufferedReader br = null; private Iterator<ZiplinedBlock> blockItr = null; private String cachedNext = null; + private boolean truncated = false; /** * @param blocks which should be fetched and unzipped, one after another */ public ZiplinesChunkIterator(List<ZiplinedBlock> blocks) { + LOGGER.info("initialized with " + blocks.size() + " blocks"); blockItr = blocks.iterator(); } /* (non-Javadoc) @@ -148,4 +155,16 @@ System.exit(1); } } + /** + * @return the truncated + */ + public boolean isTruncated() { + return truncated; + } + /** + * @param truncated the truncated to set + */ + public void setTruncated(boolean truncated) { + this.truncated = truncated; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-04-02 02:53:44 UTC (rev 3009) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-04-02 03:06:28 UTC (rev 3010) @@ -28,8 +28,10 @@ import it.unimi.dsi.mg4j.util.FrontCodedStringList; import java.io.BufferedReader; +import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -82,6 +84,7 @@ private String chunkMapPath = null; private HashMap<String,String> chunkMap = null; private CDXFormat format = null; + private int maxBlocks = 1000; public ZiplinesSearchResultSource() { } @@ -130,40 +133,51 @@ } } - public Iterator<String> getStringPrefixIterator(String prefix) throws ResourceIndexNotAvailableException, IOException { - CloseableIterator<String> itr = chunkIndex.getRecordIteratorLT(prefix); + public Iterator<String> getStringPrefixIterator(String prefix) + throws ResourceIndexNotAvailableException, IOException { + ArrayList<ZiplinedBlock> blocks = new ArrayList<ZiplinedBlock>(); boolean first = true; - while(itr.hasNext()) { - String blockDescriptor = itr.next(); - String parts[] = blockDescriptor.split("\t"); - if(parts.length != 3) { - throw new ResourceIndexNotAvailableException("Bad line(" + - blockDescriptor + ")"); + int numBlocks = 0; + boolean truncated = false; + CloseableIterator<String> itr = null; + try { + itr = chunkIndex.getRecordIteratorLT(prefix); + while(itr.hasNext()) { + if(numBlocks >= maxBlocks) { + truncated = true; + break; + } + String blockDescriptor = itr.next(); + numBlocks++; + String parts[] = blockDescriptor.split("\t"); + if(parts.length != 3) { + throw new ResourceIndexNotAvailableException("Bad line(" + + blockDescriptor + ")"); + } + // only compare the correct length: + String prefCmp = prefix; + String blockCmp = parts[0]; + if(first) { + // always add first: + first = false; + } else if(!blockCmp.startsWith(prefCmp)) { + // all done; + break; + } + // add this and keep lookin... + String url = chunkMap.get(parts[1]); + long offset = Long.parseLong(parts[2]); + blocks.add(new ZiplinedBlock(url, offset)); } - // only compare the correct length: - String prefCmp = prefix; - String blockCmp = parts[0]; -// if(prefCmp.length() < blockCmp.length()) { -// blockCmp = blockCmp.substring(0,prefCmp.length()); -// } else { -// prefCmp = prefCmp.substring(0,blockCmp.length()); -// } - if(first) { - // always add first: - first = false; -// } else if(blockCmp.compareTo(prefCmp) > 0) { - } else if(!blockCmp.startsWith(prefCmp)) { - // all done; - break; + } finally { + if(itr != null) { + itr.close(); } - // add this and keep lookin... - String url = chunkMap.get(parts[1]); - long offset = Long.parseLong(parts[2]); - blocks.add(new ZiplinedBlock(url, offset)); } - itr.close(); - return new StringPrefixIterator(new ZiplinesChunkIterator(blocks),prefix); + ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks); + zci.setTruncated(truncated); + return new StringPrefixIterator(zci,prefix); } /* (non-Javadoc) @@ -216,5 +230,103 @@ public void setChunkMapPath(String chunkMapPath) { this.chunkMapPath = chunkMapPath; } + /** + * @return the maxBlocks + */ + public int getMaxBlocks() { + return maxBlocks; + } + /** + * @param maxBlocks the maxBlocks to set + */ + public void setMaxBlocks(int maxBlocks) { + this.maxBlocks = maxBlocks; + } + private static void USAGE() { + System.err.println("USAGE:"); + System.err.println(""); + System.err.println("zl-bin-search [-format FORMAT] [-max MAX_BLOCKS] SUMMARY LOCATION KEY"); + System.err.println(""); + System.err.println("Search a ziplined compressed CDX format index for key"); + System.err.println("KEY to STDOUT. SUMMARY and LOCATION are paths to the"); + System.err.println("block summary and file location files."); + System.err.println("With -format, output CDX in format FORMAT."); + System.err.println("With -max, limit search at most MAX_BLOCKS blocks."); + System.exit(1); + } + + /** + * @param args + */ + public static void main(String[] args) { +// String cdxSpec = CDXFormatIndex.CDX_HEADER_MAGIC; + String cdxSpec = " CDX N b a m s k r V g"; + CDXFormat format = null; + try { + format = new CDXFormat(cdxSpec); + } catch (CDXFormatException e1) { + e1.printStackTrace(); + System.exit(1); + } + ZiplinesSearchResultSource zl = new ZiplinesSearchResultSource(format); + PrintWriter pw = new PrintWriter(System.out); + int idx; + for(idx = 0; idx < args.length; idx++) { + if(args[idx].equals("-format")) { + idx++; + if(idx >= args.length) { + USAGE(); + } + try { + zl.setFormat(new CDXFormat(args[idx])); + } catch (CDXFormatException e1) { + e1.printStackTrace(); + System.exit(1); + } + } else if(args[idx].equals("-max")) { + idx++; + if(idx >= args.length) { + USAGE(); + } + try { + zl.setMaxBlocks(Integer.parseInt(args[idx])); + } catch(NumberFormatException e) { + USAGE(); + System.exit(1); + } + + } else { + break; + } + } + if(args.length < idx + 3) { + USAGE(); + } + // first is summary path, then location path, then search key: + zl.setChunkIndexPath(args[idx++]); + zl.setChunkMapPath(args[idx++]); + String key = args[idx++]; + + try { + zl.init(); + Iterator<String> itr = zl.getStringPrefixIterator(key); + boolean truncated = ((StringPrefixIterator)itr).isTruncated(); + while(itr.hasNext()) { + pw.println(itr.next()); + } + pw.close(); + if(truncated) { + System.err.println("Note that results are truncated..."); + } + } catch (ResourceIndexNotAvailableException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + System.exit(1); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + System.exit(1); + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-02 02:53:50
|
Revision: 3009 http://archive-access.svn.sourceforge.net/archive-access/?rev=3009&view=rev Author: bradtofel Date: 2010-04-02 02:53:44 +0000 (Fri, 02 Apr 2010) Log Message: ----------- TWEAK: changed DatabaseException to IOException on initialization failure of BDBRecordSet and callers Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2010-03-30 22:34:57 UTC (rev 3008) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2010-04-02 02:53:44 UTC (rev 3009) @@ -62,7 +62,7 @@ * @throws DatabaseException * @throws ConfigurationException */ - public void init() throws DatabaseException, ConfigurationException { + public void init() throws IOException, ConfigurationException { initializeDB(bdbPath,bdbName); } @@ -151,7 +151,7 @@ UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); try { index.initializeDB(path,name); - } catch (DatabaseException e) { + } catch (IOException e) { e.printStackTrace(); System.exit(1); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2010-03-30 22:34:57 UTC (rev 3008) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2010-04-02 02:53:44 UTC (rev 3009) @@ -87,11 +87,7 @@ public void init() throws IOException { bdb = new BDBRecordSet(); - try { - bdb.initializeDB(bdbPath,bdbName); - } catch (DatabaseException e) { - throw wrapDBException(e); - } + bdb.initializeDB(bdbPath,bdbName); if(logPath == null) { throw new IOException("No logPath"); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java 2010-03-30 22:34:57 UTC (rev 3008) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java 2010-04-02 02:53:44 UTC (rev 3009) @@ -25,6 +25,7 @@ package org.archive.wayback.util.bdb; import java.io.File; +import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Iterator; @@ -74,10 +75,10 @@ /** * @param thePath Directory where BDBJE files are stored * @param theDbName Name of files in thePath - * @throws DatabaseException + * @throws IOException for usual reasons, plus as database exceptions */ public void initializeDB(final String thePath, final String theDbName) - throws DatabaseException { + throws IOException { path = thePath; dbName = theDbName; @@ -88,7 +89,7 @@ File file = new File(path); if(!file.isDirectory()) { if(!file.mkdirs()) { - throw new DatabaseException("failed mkdirs(" + path + ")"); + throw new IOException("failed mkdirs(" + path + ")"); } } env = new Environment(file, environmentConfig); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-03-30 22:35:03
|
Revision: 3008 http://archive-access.svn.sourceforge.net/archive-access/?rev=3008&view=rev Author: bradtofel Date: 2010-03-30 22:34:57 +0000 (Tue, 30 Mar 2010) Log Message: ----------- BUGFIX: now using (hopefully) correct resolving code: UURIFactory.resolve(UURI,String) instead of UURI.resolve(String,true)... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-03-29 21:50:42 UTC (rev 3007) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-03-30 22:34:57 UTC (rev 3008) @@ -100,7 +100,7 @@ url = url.substring(0,hashIdx); } try { - return baseUrl.resolve(url,true).toString() + frag; + return UURIFactory.getInstance(baseUrl, url).toString() + frag; } catch (URIException e) { e.printStackTrace(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-03-29 21:50:48
|
Revision: 3007 http://archive-access.svn.sourceforge.net/archive-access/?rev=3007&view=rev Author: bradtofel Date: 2010-03-29 21:50:42 +0000 (Mon, 29 Mar 2010) Log Message: ----------- TWEAK: speling problem Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdministrativeAccessControlException.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdministrativeAccessControlException.java (from rev 3006, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdministrativeAccessControlException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdministrativeAccessControlException.java 2010-03-29 21:50:42 UTC (rev 3007) @@ -0,0 +1,40 @@ +/* AdministrativeAccessControlException + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.exception; + +/** + * @author brad + * + */ +public class AdministrativeAccessControlException extends AccessControlException { + + /** + * @param message + */ + public AdministrativeAccessControlException(String message) { + super("Blocked Site Error",message); + } +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java 2010-03-26 03:22:05 UTC (rev 3006) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java 2010-03-29 21:50:42 UTC (rev 3007) @@ -1,40 +0,0 @@ -/* AdminstrativeAccessControlException - * - * $Id$: - * - * Created on Mar 25, 2010. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -package org.archive.wayback.exception; - -/** - * @author brad - * - */ -public class AdminstrativeAccessControlException extends AccessControlException { - - /** - * @param message - */ - public AdminstrativeAccessControlException(String message) { - super("Blocked Site Error",message); - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-03-26 03:22:11
|
Revision: 3006 http://archive-access.svn.sourceforge.net/archive-access/?rev=3006&view=rev Author: bradtofel Date: 2010-03-26 03:22:05 +0000 (Fri, 26 Mar 2010) Log Message: ----------- FEATURE: Added new subclasses of AccessControlException, Robot and Administrative... Required defining the new abstract ExclusionFilter, which is aware of the ExclusionFilterGroup, so it can aprise the filtergroup of blocked and passed robots and admin filtering. All Exclusion-related Filters now have to extend the ExclusionFilter.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/CompositeExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -29,6 +29,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -54,7 +55,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { Iterator<ExclusionFilterFactory> itr = factories.iterator(); CompositeExclusionFilter filter = new CompositeExclusionFilter(); while(itr.hasNext()) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -25,6 +25,7 @@ package org.archive.wayback.accesscontrol; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** * @@ -37,7 +38,7 @@ * @return an ObjectFilter object that filters records based on * some set of exclusion rules */ - public ObjectFilter<CaptureSearchResult> get(); + public ExclusionFilter get(); /** * close any resources used by this ExclusionFilter system. */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -31,21 +31,25 @@ import org.archive.accesscontrol.RuleOracleUnavailableException; import org.archive.util.ArchiveUtils; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** * @author brad * */ -public class OracleExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class OracleExclusionFilter extends ExclusionFilter { AccessControlClient client = null; private String accessGroup = null; private final static String POLICY_ALLOW = "allow"; private final static String POLICY_BLOCK = "block"; private final static String POLICY_ROBOT = "robots"; + private boolean notifiedRobotSeen = false; + private boolean notifiedRobotPassed = false; + private boolean notifiedAdminSeen = false; + private boolean notifiedAdminPassed = false; - /** * @param oracleUrl String URL prefix for the Oracle HTTP server * @param accessGroup String group to use with requests to the Oracle @@ -84,12 +88,32 @@ accessGroup); if(policy != null) { if(policy.equals(POLICY_ALLOW)) { + if(!notifiedAdminSeen) { + notifiedAdminSeen = true; + filterGroup.setSawAdministrative(); + } + if(!notifiedAdminPassed) { + notifiedAdminPassed = true; + filterGroup.setPassedAdministrative(); + } return FILTER_INCLUDE; } else if(policy.equals(POLICY_BLOCK)) { + if(!notifiedAdminSeen) { + notifiedAdminSeen = true; + filterGroup.setSawAdministrative(); + } return FILTER_EXCLUDE; } else if(policy.equals(POLICY_ROBOT)) { + if(!notifiedRobotSeen) { + notifiedRobotSeen = true; + filterGroup.setSawRobots(); + } return FILTER_INCLUDE; // if(robotFilter != null) { +// if(!notifiedRobotPassed) { +// notifiedRobotPassed = true; +// filterGroup.setPassedRobot(); +// } // return robotFilter.filterObject(o); // } else { // return FILTER_EXCLUDE; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/oracleclient/OracleExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -26,6 +26,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -40,7 +41,7 @@ private String accessGroup = null; private String proxyHostPort = null; - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { OracleExclusionFilter filter = new OracleExclusionFilter(oracleUrl, accessGroup, proxyHostPort); return filter; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -33,7 +33,7 @@ import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; /** * SearchResultFilter which uses remote access control/exclusion service to @@ -43,7 +43,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RemoteExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class RemoteExclusionFilter extends ExclusionFilter { private static final Logger LOGGER = Logger.getLogger(RemoteExclusionFilter.class .getName()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -26,6 +26,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -44,7 +45,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { return new RemoteExclusionFilter(exclusionUrlPrefix, exclusionUserAgent); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -41,6 +41,7 @@ import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.LiveWebCacheUnavailableException; import org.archive.wayback.liveweb.LiveWebCache; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -58,7 +59,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RobotExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class RobotExclusionFilter extends ExclusionFilter { private final static Logger LOGGER = Logger.getLogger(RobotExclusionFilter.class.getName()); @@ -74,6 +75,8 @@ private String userAgent = null; private StringBuilder sb = null; private final static RobotRules emptyRules = new RobotRules(); + private boolean notifiedSeen = false; + private boolean notifiedPassed = false; /** * Construct a new RobotExclusionFilter that uses webCache to pull @@ -226,7 +229,10 @@ * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { - + if(!notifiedSeen) { + filterGroup.setSawRobots(); + notifiedSeen = true; + } int filterResult = ObjectFilter.FILTER_EXCLUDE; RobotRules rules = getRules(r); if(rules != null) { @@ -235,6 +241,10 @@ try { url = new URL(ArchiveUtils.addImpliedHttpIfNecessary(resultURL)); if(!rules.blocksPathForUA(url.getPath(), userAgent)) { + if(!notifiedPassed) { + filterGroup.setPassedRobots(); + notifiedPassed = true; + } filterResult = ObjectFilter.FILTER_INCLUDE; LOGGER.fine("ROBOT: ALLOWED("+resultURL+")"); } else { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -27,6 +27,7 @@ import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.liveweb.LiveWebCache; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; /** @@ -44,7 +45,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -30,6 +30,7 @@ import org.apache.commons.httpclient.URIException; import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.ObjectFilter; @@ -39,12 +40,14 @@ * @author brad * @version $Date$, $Revision$ */ -public class StaticMapExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class StaticMapExclusionFilter extends ExclusionFilter { private static final Logger LOGGER = Logger.getLogger( StaticMapExclusionFilter.class.getName()); private String lastChecked = null; private boolean lastCheckedExcluded = false; + private boolean notifiedSeen = false; + private boolean notifiedPassed = false; Map<String,Object> exclusionMap = null; /** * @param map where each String key is a SURT that is blocked. @@ -77,18 +80,33 @@ * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { + if(!notifiedSeen) { + filterGroup.setSawAdministrative(); + notifiedSeen = true; + } String url = r.getOriginalUrl(); if(lastChecked != null) { if(lastChecked.equals(url)) { - return lastCheckedExcluded ? - ObjectFilter.FILTER_EXCLUDE : - ObjectFilter.FILTER_INCLUDE; + if(lastCheckedExcluded) { + return ObjectFilter.FILTER_EXCLUDE; + } else { + // don't need to: already did last time... + //filterGroup.setPassedAdministrative(); + return ObjectFilter.FILTER_INCLUDE; + } } } lastChecked = url; lastCheckedExcluded = isExcluded(url); - return lastCheckedExcluded ? - ObjectFilter.FILTER_EXCLUDE : - ObjectFilter.FILTER_INCLUDE; + if(lastCheckedExcluded) { + return ObjectFilter.FILTER_EXCLUDE; + } else { + if(!notifiedPassed) { + filterGroup.setPassedAdministrative(); + notifiedPassed = true; + } + return ObjectFilter.FILTER_INCLUDE; + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -33,6 +33,7 @@ import org.apache.log4j.Logger; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; @@ -110,7 +111,7 @@ * @return ObjectFilter which blocks CaptureSearchResults in the * exclusion file. */ - public ObjectFilter<CaptureSearchResult> get() { + public ExclusionFilter get() { if(currentMap == null) { return null; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -36,6 +36,7 @@ import javax.servlet.http.HttpServletRequest; import org.archive.wayback.requestparser.OpenSearchRequestParser; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourceindex.filters.HostMatchFilter; import org.archive.wayback.resourceindex.filters.SchemeMatchFilter; import org.archive.wayback.util.ObjectFilter; @@ -84,7 +85,7 @@ * resultFilters, if these filters redact all results, then an * AccessControlException will be thrown. */ - private ObjectFilter<CaptureSearchResult> exclusionFilter = null; + private ExclusionFilter exclusionFilter = null; /** * custom CaptureSearchResult Filter to use for this specific request. Can @@ -486,11 +487,11 @@ this.accessPoint = accessPoint; } - public ObjectFilter<CaptureSearchResult> getExclusionFilter() { + public ExclusionFilter getExclusionFilter() { return exclusionFilter; } - public void setExclusionFilter(ObjectFilter<CaptureSearchResult> exclusionFilter) { + public void setExclusionFilter(ExclusionFilter exclusionFilter) { this.exclusionFilter = exclusionFilter; } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -0,0 +1,40 @@ +/* AdminstrativeAccessControlException + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.exception; + +/** + * @author brad + * + */ +public class AdminstrativeAccessControlException extends AccessControlException { + + /** + * @param message + */ + public AdminstrativeAccessControlException(String message) { + super("Blocked Site Error",message); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/AdminstrativeAccessControlException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -0,0 +1,40 @@ +/* RobotAccessControlException + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.exception; + +/** + * @author brad + * + */ +public class RobotAccessControlException extends AccessControlException { + + /** + * @param message + */ + public RobotAccessControlException(String message) { + super("Blocked By Robots",message); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RobotAccessControlException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -30,8 +30,11 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; +import org.archive.wayback.exception.AdminstrativeAccessControlException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.exception.RobotAccessControlException; import org.archive.wayback.resourceindex.filters.CounterFilter; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; @@ -41,23 +44,27 @@ private CounterFilter preCounter = null; private CounterFilter postCounter = null; String requestUrl = null; + private boolean sawRobots = false; + private boolean passedRobots = false; + private boolean sawAdministrative = false; + private boolean passedAdministrative = false; public ExclusionCaptureFilterGroup(WaybackRequest request) { // checks an exclusion service for every matching record - ObjectFilter<CaptureSearchResult> exclusion = - request.getExclusionFilter(); + ExclusionFilter exclusion = request.getExclusionFilter(); chain = new ObjectFilterChain<CaptureSearchResult>(); if(exclusion != null) { - preCounter = new CounterFilter(); - // count how many results got to the ExclusionFilter: - chain.addFilter(preCounter); + exclusion.setFilterGroup(this); +// preCounter = new CounterFilter(); +// // count how many results got to the ExclusionFilter: +// chain.addFilter(preCounter); chain.addFilter(exclusion); // count how many results got past the ExclusionFilter: requestUrl = request.getRequestUrl(); } - postCounter = new CounterFilter(); - chain.addFilter(postCounter); +// postCounter = new CounterFilter(); +// chain.addFilter(postCounter); } public List<ObjectFilter<CaptureSearchResult>> getFilters() { @@ -66,20 +73,38 @@ public void annotateResults(SearchResults results) throws AccessControlException, ResourceNotInArchiveException { - if(postCounter.getNumMatched() == 0) { + if(sawRobots && !passedRobots) { + throw new RobotAccessControlException("The URL " + requestUrl + + " is blocked by the sites robots.txt file"); + } + if(sawAdministrative && !passedAdministrative) { + throw new AdminstrativeAccessControlException(requestUrl + + " is not available in the Wayback Machine."); + } - // nothing got to the counter after exclusions. If we have - // exclusions (detected by preCounter being non-null, and the - // preCounter passed any results, then they were all filtered by - // the exclusions filter. - if(preCounter != null && preCounter.getNumMatched() > 0) { - throw new AccessControlException("All results Excluded"); - } - ResourceNotInArchiveException e = - new ResourceNotInArchiveException("the URL " + requestUrl - + " is not in the archive."); - e.setCloseMatches(results.getCloseMatches()); - throw e; - } +// if(postCounter.getNumMatched() == 0) { +// +// // nothing got to the counter after exclusions. If we have +// // exclusions (detected by preCounter being non-null, and the +// // preCounter passed any results, then they were all filtered by +// // the exclusions filter. +// if(preCounter != null && preCounter.getNumMatched() > 0) { +// throw new AccessControlException("All results Excluded"); +// } +// } } + + public void setPassedRobots() { + passedRobots = true; + } + public void setSawRobots() { + sawRobots = true; + } + + public void setPassedAdministrative() { + passedAdministrative = true; + } + public void setSawAdministrative() { + sawAdministrative = true; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/WindowFilterGroup.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -29,6 +29,7 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.resourceindex.LocalResourceIndex; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; @@ -42,9 +43,10 @@ ObjectFilterChain<T> windowFilters; WindowStartFilter<T> startFilter; WindowEndFilter<T> endFilter; + private String requestUrl = null; public WindowFilterGroup(WaybackRequest request, LocalResourceIndex index) throws BadQueryException { - + requestUrl = request.getRequestUrl(); windowFilters = new ObjectFilterChain<T>(); // first grab all the info from the WaybackRequest, and validate it: resultsPerPage = request.getResultsPerPage(); @@ -71,13 +73,24 @@ } public void annotateResults(SearchResults results) - throws BadQueryException { + throws BadQueryException, ResourceNotInArchiveException { results.setFirstReturned(startResult); results.setNumRequested(resultsPerPage); + int startSeen = startFilter.getNumSeen(); + if(startSeen == 0) { + ResourceNotInArchiveException e = + new ResourceNotInArchiveException("the URL " + requestUrl + + " is not in the archive."); + e.setCloseMatches(results.getCloseMatches()); + throw e; + } + int numSeen = endFilter.getNumSeen(); if(numSeen == 0) { throw new BadQueryException("No results in requested window"); } + + // how many went by the filters: results.setMatchingCount(startFilter.getNumSeen()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2010-03-24 01:08:53 UTC (rev 3005) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -28,6 +28,7 @@ import java.util.Iterator; import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filterfactory.ExclusionCaptureFilterGroup; import org.archive.wayback.util.ObjectFilter; /** @@ -38,22 +39,29 @@ * @author brad * @version $Date$, $Revision$ */ -public class CompositeExclusionFilter implements ObjectFilter<CaptureSearchResult> { +public class CompositeExclusionFilter extends ExclusionFilter { + //implements ObjectFilter<CaptureSearchResult> { - private ArrayList<ObjectFilter<CaptureSearchResult>> filters = - new ArrayList<ObjectFilter<CaptureSearchResult>>(); + private ArrayList<ExclusionFilter> filters = + new ArrayList<ExclusionFilter>(); /** * @param filter to be added to the composite. */ - public void addComponent(ObjectFilter<CaptureSearchResult> filter) { + public void addComponent(ExclusionFilter filter) { filters.add(filter); } + public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { + this.filterGroup = filterGroup; + for(ExclusionFilter filter : filters) { + filter.setFilterGroup(filterGroup); + } + } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { - Iterator<ObjectFilter<CaptureSearchResult>> itr = filters.iterator(); + Iterator<ExclusionFilter> itr = filters.iterator(); while(itr.hasNext()) { ObjectFilter<CaptureSearchResult> filter = itr.next(); if(filter == null) { Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java 2010-03-26 03:22:05 UTC (rev 3006) @@ -0,0 +1,41 @@ +/* ExclusionFilter + * + * $Id$: + * + * Created on Mar 25, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.filters; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourceindex.filterfactory.ExclusionCaptureFilterGroup; +import org.archive.wayback.util.ObjectFilter; + +/** + * @author brad + * + */ +public abstract class ExclusionFilter implements ObjectFilter<CaptureSearchResult> { + protected ExclusionCaptureFilterGroup filterGroup; + public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { + this.filterGroup = filterGroup; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |