You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
Revision: 3405 http://archive-access.svn.sourceforge.net/archive-access/?rev=3405&view=rev Author: bradtofel Date: 2011-02-06 14:49:24 +0000 (Sun, 06 Feb 2011) Log Message: ----------- FEATURE: Exposed connect and socket timeouts, which are now caught, capturing IllegalArugmentException on GetMethod.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java 2011-02-06 14:48:05 UTC (rev 3404) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java 2011-02-06 14:49:24 UTC (rev 3405) @@ -22,9 +22,12 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.net.ConnectException; +import java.net.SocketTimeoutException; import java.net.URL; +import java.util.logging.Logger; import java.util.zip.GZIPInputStream; +import org.apache.commons.httpclient.ConnectTimeoutException; import org.apache.commons.httpclient.HostConfiguration; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpMethod; @@ -43,10 +46,13 @@ * */ public class RemoteLiveWebCache implements LiveWebCache { + private static final Logger LOGGER = Logger.getLogger( + RemoteLiveWebCache.class.getName()); private MultiThreadedHttpConnectionManager connectionManager = null; private HostConfiguration hostConfiguration = null; private HttpClient http = null; + /** * */ @@ -57,14 +63,21 @@ http.setHostConfiguration(hostConfiguration); } - /* (non-Javadoc) + /* (non-Javadoc) * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean) */ public Resource getCachedResource(URL url, long maxCacheMS, boolean bUseOlder) throws LiveDocumentNotAvailableException, LiveWebCacheUnavailableException, IOException { String urlString = url.toExternalForm(); - HttpMethod method = new GetMethod(urlString); + HttpMethod method = null; + try { + method = new GetMethod(urlString); + } catch(IllegalArgumentException e) { + LOGGER.warning("Bad URL for live web fetch:" + urlString); + throw new LiveDocumentNotAvailableException("Url:" + urlString + + "does not look like an URL?"); + } try { int status = http.executeMethod(method); if(status == 200) { @@ -84,9 +97,16 @@ } } catch (ResourceNotAvailableException e) { throw new LiveDocumentNotAvailableException(urlString); + } catch (ConnectException e) { throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() + " : " + urlString); + } catch (SocketTimeoutException e) { + throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() + + " : " + urlString); + } catch(ConnectTimeoutException e) { + throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() + + " : " + urlString); } finally { method.releaseConnection(); } @@ -127,4 +147,31 @@ connectionManager.getParams(). setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); } + /** + * @return the connectionTimeoutMS + */ + public int getConnectionTimeoutMS() { + return connectionManager.getParams().getConnectionTimeout(); + } + + /** + * @param connectionTimeoutMS the connectionTimeoutMS to set + */ + public void setConnectionTimeoutMS(int connectionTimeoutMS) { + connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS); + } + + /** + * @return the socketTimeoutMS + */ + public int getSocketTimeoutMS() { + return connectionManager.getParams().getSoTimeout(); + } + + /** + * @param socketTimeoutMS the socketTimeoutMS to set + */ + public void setSocketTimeoutMS(int socketTimeoutMS) { + connectionManager.getParams().setSoTimeout(socketTimeoutMS); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:48:11
|
Revision: 3404 http://archive-access.svn.sourceforge.net/archive-access/?rev=3404&view=rev Author: bradtofel Date: 2011-02-06 14:48:05 +0000 (Sun, 06 Feb 2011) Log Message: ----------- FEATURE: expanded rewrite and passthrough capabilities to use a Map Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/RedirectRewritingHttpHeaderProcessor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/XArchiveHttpHeaderProcessor.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java 2011-02-06 14:46:02 UTC (rev 3403) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java 2011-02-06 14:48:05 UTC (rev 3404) @@ -51,6 +51,11 @@ public final static String HTTP_CONTENT_TYPE_HEADER = "Content-Type"; public final static String HTTP_CONTENT_TYPE_HEADER_UP = HTTP_CONTENT_TYPE_HEADER.toUpperCase(); + + public final static String HTTP_CONTENT_DISP_HEADER = "Content-Disposition"; + public final static String HTTP_CONTENT_DISP_HEADER_UP = + HTTP_CONTENT_DISP_HEADER.toUpperCase(); + /** * optionally add header key:value to output for later returning to client Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/RedirectRewritingHttpHeaderProcessor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/RedirectRewritingHttpHeaderProcessor.java 2011-02-06 14:46:02 UTC (rev 3403) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/RedirectRewritingHttpHeaderProcessor.java 2011-02-06 14:48:05 UTC (rev 3404) @@ -19,7 +19,9 @@ */ package org.archive.wayback.replay; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.CaptureSearchResult; @@ -36,6 +38,19 @@ private static String DEFAULT_PREFIX = null; private String prefix = DEFAULT_PREFIX; + private Set<String> passThroughHeaders = null; + private Set<String> rewriteHeaders = null; + + public RedirectRewritingHttpHeaderProcessor() { + passThroughHeaders = new HashSet<String>(); + passThroughHeaders.add(HTTP_CONTENT_TYPE_HEADER_UP); + passThroughHeaders.add(HTTP_CONTENT_DISP_HEADER_UP); + + rewriteHeaders = new HashSet<String>(); + rewriteHeaders.add(HTTP_LOCATION_HEADER_UP); + rewriteHeaders.add(HTTP_CONTENT_LOCATION_HEADER_UP); + rewriteHeaders.add(HTTP_CONTENT_BASE_HEADER_UP); + } public String getPrefix() { return prefix; @@ -65,9 +80,10 @@ } // rewrite Location header URLs - if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || - keyUp.startsWith(HTTP_CONTENT_LOCATION_HEADER_UP) || - keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { + if(rewriteHeaders.contains(keyUp)) { +// if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || +// keyUp.startsWith(HTTP_CONTENT_LOCATION_HEADER_UP) || +// keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { String baseUrl = result.getOriginalUrl(); String cd = result.getCaptureTimestamp(); @@ -76,7 +92,8 @@ output.put(key, uriConverter.makeReplayURI(cd,u)); - } else if(keyUp.startsWith(HTTP_CONTENT_TYPE_HEADER_UP)) { +// } else if(keyUp.startsWith(HTTP_CONTENT_TYPE_HEADER_UP)) { + } else if(passThroughHeaders.contains(keyUp)) { // let's leave this one as-is: output.put(key,value); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/XArchiveHttpHeaderProcessor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/XArchiveHttpHeaderProcessor.java 2011-02-06 14:46:02 UTC (rev 3403) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/XArchiveHttpHeaderProcessor.java 2011-02-06 14:48:05 UTC (rev 3404) @@ -19,7 +19,10 @@ */ package org.archive.wayback.replay; +import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.CaptureSearchResult; @@ -28,7 +31,14 @@ private static String DEFAULT_PREFIX = "X-Wayback-Orig-"; private String prefix = DEFAULT_PREFIX; + private Set<String> passThrough = null; + public XArchiveHttpHeaderProcessor() { + passThrough = new HashSet<String>(); + passThrough.add(HTTP_CONTENT_TYPE_HEADER_UP); + passThrough.add(HTTP_CONTENT_DISP_HEADER_UP); + } + public String getPrefix() { return prefix; } @@ -42,7 +52,8 @@ String keyUp = key.toUpperCase(); output.put(prefix + key,value); - if (keyUp.startsWith(HTTP_CONTENT_TYPE_HEADER_UP)) { + if (passThrough.contains(keyUp)) { +// if (keyUp.startsWith(HTTP_CONTENT_TYPE_HEADER_UP)) { // add this one as-is, too. output.put(key, value); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3403 http://archive-access.svn.sourceforge.net/archive-access/?rev=3403&view=rev Author: bradtofel Date: 2011-02-06 14:46:02 +0000 (Sun, 06 Feb 2011) Log Message: ----------- FEATURE: now allows rewriting of javscript: URLs Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2011-02-06 14:45:17 UTC (rev 3402) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2011-02-06 14:46:02 UTC (rev 3403) @@ -28,6 +28,7 @@ */ public class URLStringTransformer implements StringTransformer { private String flags; + private JSStringTransformer jsTransformer = null; /** Default constructor */ public URLStringTransformer() {} /** @@ -39,6 +40,17 @@ } public String transform(ReplayParseContext context, String url) { + if(url.startsWith(ReplayParseContext.JAVASCRIPT_PREFIX)) { + if(jsTransformer == null) { + return url; + } + StringBuilder sb = new StringBuilder(url.length()); + sb.append(ReplayParseContext.JAVASCRIPT_PREFIX); + String jsFragment = url.substring( + ReplayParseContext.JAVASCRIPT_PREFIX.length()); + sb.append(jsTransformer.transform(context, jsFragment)); + return sb.toString(); + } return context.contextualizeUrl(url, flags); } @@ -51,4 +63,12 @@ public void setFlags(String flags) { this.flags = flags; } + + public JSStringTransformer getJsTransformer() { + return jsTransformer; + } + public void setJsTransformer(JSStringTransformer jsTransformer) { + this.jsTransformer = jsTransformer; + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3402 http://archive-access.svn.sourceforge.net/archive-access/?rev=3402&view=rev Author: bradtofel Date: 2011-02-06 14:45:17 +0000 (Sun, 06 Feb 2011) Log Message: ----------- No longer rewriting data: urls Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java 2011-02-06 14:42:47 UTC (rev 3401) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java 2011-02-06 14:45:17 UTC (rev 3402) @@ -30,7 +30,10 @@ public class ReplayParseContext extends ParseContext { private static final String MAILTO_PREFIX = "mailto:"; - private static final String JAVASCRIPT_PREFIX = "javascript:"; + public static final String JAVASCRIPT_PREFIX = "javascript:"; + public static final String DATA_PREFIX = "data:"; + + private ContextResultURIConverterFactory uriConverterFactory = null; private String datespec = null; private JSPExecutor jspExec = null; @@ -96,6 +99,9 @@ if(url.startsWith(JAVASCRIPT_PREFIX) || url.startsWith(MAILTO_PREFIX)) { return url; } + if(url.startsWith(DATA_PREFIX) || url.startsWith(MAILTO_PREFIX)) { + return url; + } url = super.contextualizeUrl(url); if(flags == null) { flags = ""; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3401 http://archive-access.svn.sourceforge.net/archive-access/?rev=3401&view=rev Author: bradtofel Date: 2011-02-06 14:42:47 +0000 (Sun, 06 Feb 2011) Log Message: ----------- BUGFIX: moved AccessPoint query filters closer to front - too brittle this way.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-02-06 14:41:45 UTC (rev 3400) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-02-06 14:42:47 UTC (rev 3401) @@ -117,9 +117,9 @@ canonicalizer = new AggressiveUrlCanonicalizer(); fgFactories = new ArrayList<FilterGroupFactory>(); fgFactories.add(new CoreCaptureFilterGroupFactory()); + fgFactories.add(new AccessPointCaptureFilterGroupFactory()); + fgFactories.add(new ExclusionCaptureFilterGroupFactory()); fgFactories.add(new QueryCaptureFilterGroupFactory()); - fgFactories.add(new ExclusionCaptureFilterGroupFactory()); - fgFactories.add(new AccessPointCaptureFilterGroupFactory()); } private void cleanupIterator(CloseableIterator<? extends SearchResult> itr) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3400 http://archive-access.svn.sourceforge.net/archive-access/?rev=3400&view=rev Author: bradtofel Date: 2011-02-06 14:41:45 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Uses AccessPoint.embargoMS to block results Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/AccessPointCaptureFilterGroup.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/AccessPointCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/AccessPointCaptureFilterGroup.java 2011-02-06 14:41:24 UTC (rev 3399) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/AccessPointCaptureFilterGroup.java 2011-02-06 14:41:45 UTC (rev 3400) @@ -27,31 +27,47 @@ import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.resourceindex.filters.DateEmbargoFilter; import org.archive.wayback.resourceindex.filters.FilePrefixFilter; import org.archive.wayback.resourceindex.filters.FileRegexFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; +import org.archive.wayback.webapp.AccessPoint; public class AccessPointCaptureFilterGroup implements CaptureFilterGroup { private ObjectFilterChain<CaptureSearchResult> chain = null; - private final static String[] sA = null; + private final static String[] sA = new String[0]; public AccessPointCaptureFilterGroup(WaybackRequest request) { chain = new ObjectFilterChain<CaptureSearchResult>(); + AccessPoint accessPoint = request.getAccessPoint(); List<String> prefixes = null; if(request.getAccessPoint() != null) { - prefixes = request.getAccessPoint().getFilePrefixes(); + prefixes = accessPoint.getFileIncludePrefixes(); if(prefixes != null && prefixes.size() > 0) { FilePrefixFilter f = new FilePrefixFilter(); f.setPrefixes(prefixes.toArray(sA)); chain.addFilter(f); } - List<String> patterns = request.getAccessPoint().getFilePatterns(); + prefixes = accessPoint.getFileExcludePrefixes(); + if(prefixes != null && prefixes.size() > 0) { + FilePrefixFilter f = new FilePrefixFilter(); + f.setIncludeMatches(false); + f.setPrefixes(prefixes.toArray(sA)); + chain.addFilter(f); + } + + + List<String> patterns = accessPoint.getFilePatterns(); if(patterns != null && patterns.size() > 0) { FileRegexFilter f = new FileRegexFilter(); f.setPatterns(patterns); chain.addFilter(f); } + long embargoMS = accessPoint.getEmbargoMS(); + if(embargoMS > 0) { + chain.addFilter(new DateEmbargoFilter(embargoMS)); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3399 http://archive-access.svn.sourceforge.net/archive-access/?rev=3399&view=rev Author: bradtofel Date: 2011-02-06 14:41:24 +0000 (Sun, 06 Feb 2011) Log Message: ----------- initial rev - blocks records who's catpure date is too recent Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateEmbargoFilter.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateEmbargoFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateEmbargoFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateEmbargoFilter.java 2011-02-06 14:41:24 UTC (rev 3399) @@ -0,0 +1,17 @@ +package org.archive.wayback.resourceindex.filters; + +import java.util.Date; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.util.ObjectFilter; + +public class DateEmbargoFilter implements ObjectFilter<CaptureSearchResult> { + protected Date embargoDate = null; + public DateEmbargoFilter(long minAge) { + embargoDate = new Date(System.currentTimeMillis() - minAge); + } + public int filterObject(CaptureSearchResult o) { + return o.getCaptureDate().compareTo(embargoDate) < 0 + ? FILTER_INCLUDE : FILTER_EXCLUDE; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:40:51
|
Revision: 3398 http://archive-access.svn.sourceforge.net/archive-access/?rev=3398&view=rev Author: bradtofel Date: 2011-02-06 14:40:45 +0000 (Sun, 06 Feb 2011) Log Message: ----------- new constructor without resourceBundle check for NPE changed visibility of getFormat Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java 2011-02-06 14:38:47 UTC (rev 3397) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java 2011-02-06 14:40:45 UTC (rev 3398) @@ -54,13 +54,22 @@ * objects. * @param locale to use, where applicable with MessageFormat objects */ + public StringFormatter(ResourceBundle bundle) { + this(null,Locale.getDefault()); + } + /** + * Construct a StringFormatter... + * @param bundle ResourceBundle to lookup patterns for MessageFormat + * objects. + * @param locale to use, where applicable with MessageFormat objects + */ public StringFormatter(ResourceBundle bundle, Locale locale) { this.bundle = bundle; this.locale = locale; formats = new HashMap<String,MessageFormat>(); } - private MessageFormat getFormat(String pattern) { + public MessageFormat getFormat(String pattern) { MessageFormat format = formats.get(pattern); if(format == null) { format = new MessageFormat(pattern,locale); @@ -88,11 +97,18 @@ * something goes wrong... */ public String getLocalized(String key) { - try { - return bundle.getString(key); - } catch (Exception e) { - return key; + if(bundle != null) { + try { + return bundle.getString(key); + // String localized = bundle.getString(key); + // if((localized != null) && (localized.length() > 0)) { + // return localized; + // } + } catch (Exception e) { + } + } + return key; } private String formatInner(String key, Object objects[]) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3397 http://archive-access.svn.sourceforge.net/archive-access/?rev=3397&view=rev Author: bradtofel Date: 2011-02-06 14:38:47 +0000 (Sun, 06 Feb 2011) Log Message: ----------- FEATURE: added flag to indicate if matches are blocked or included Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/FilePrefixFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/FilePrefixFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/FilePrefixFilter.java 2011-02-06 14:38:07 UTC (rev 3396) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/FilePrefixFilter.java 2011-02-06 14:38:47 UTC (rev 3397) @@ -25,7 +25,9 @@ public class FilePrefixFilter implements ObjectFilter<CaptureSearchResult> { private String prefixes[] = null; + private boolean includeMatches = true; + public String[] getPrefixes() { return prefixes; } @@ -37,9 +39,17 @@ final String file = o.getFile(); for(String prefix : prefixes) { if(file.startsWith(prefix)) { - return FILTER_INCLUDE; + return includeMatches ? FILTER_INCLUDE : FILTER_EXCLUDE; } } - return FILTER_EXCLUDE; + return includeMatches ? FILTER_EXCLUDE : FILTER_INCLUDE; } + + public boolean isIncludeMatches() { + return includeMatches; + } + + public void setIncludeMatches(boolean includeMatches) { + this.includeMatches = includeMatches; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3396 http://archive-access.svn.sourceforge.net/archive-access/?rev=3396&view=rev Author: bradtofel Date: 2011-02-06 14:38:07 +0000 (Sun, 06 Feb 2011) Log Message: ----------- LOGGING: replaced stacktrace with log message Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2011-02-06 14:37:36 UTC (rev 3395) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2011-02-06 14:38:07 UTC (rev 3396) @@ -19,6 +19,8 @@ */ package org.archive.wayback.resourceindex.filters; +import java.util.logging.Logger; + import org.apache.commons.httpclient.URIException; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; @@ -34,6 +36,8 @@ * @version $Date$, $Revision$ */ public class SelfRedirectFilter implements ObjectFilter<CaptureSearchResult> { + private static final Logger LOGGER = Logger.getLogger(SelfRedirectFilter + .class.getName()); private UrlCanonicalizer canonicalizer = null; public SelfRedirectFilter() { @@ -66,7 +70,9 @@ } } catch (URIException e) { // emit message (is that right?) and continue - e.printStackTrace(); + LOGGER.info("Bad redirectURL:" + redirect + + " urlKey:"+ urlKey + + " date:"+ r.getCaptureTimestamp()); } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3395 http://archive-access.svn.sourceforge.net/archive-access/?rev=3395&view=rev Author: bradtofel Date: 2011-02-06 14:37:36 +0000 (Sun, 06 Feb 2011) Log Message: ----------- LOGGING: added more descriptive message when confronted with badly formatted block location file Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java 2011-02-06 14:36:45 UTC (rev 3394) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java 2011-02-06 14:37:36 UTC (rev 3395) @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.logging.Logger; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.util.CloseableIterator; @@ -32,6 +33,9 @@ * */ public class ZiplinedBlockStringSequence { + private static final Logger LOGGER = Logger.getLogger( + ZiplinedBlockStringSequence.class.getName()); + private FlatFile chunkIndex = null; private HashMap<String,String> chunkMap = null; private int maxBlocks = 10000; @@ -60,6 +64,8 @@ numBlocks++; String parts[] = blockDescriptor.split("\t"); if(parts.length != 4) { + LOGGER.severe("Bad Block descriptor Line(" + + blockDescriptor + " in " + chunkIndex.getPath()); throw new ResourceIndexNotAvailableException("Bad line(" + blockDescriptor + ")"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:36:52
|
Revision: 3394 http://archive-access.svn.sourceforge.net/archive-access/?rev=3394&view=rev Author: bradtofel Date: 2011-02-06 14:36:45 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Now include version info in filedesc and warcinfo records Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java 2011-02-06 14:35:50 UTC (rev 3393) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java 2011-02-06 14:36:45 UTC (rev 3394) @@ -42,7 +42,8 @@ // private static final Logger LOGGER = Logger.getLogger( // ARCRecordToSearchResultAdapter.class.getName()); - + private static final String VERSION = "0.1.0"; + private static final String ARC_FILEDESC_VERSION = "arc/filedesc" + VERSION; private HTTPRecordAnnotater annotater = null; private UrlCanonicalizer canonicalizer = null; @@ -88,10 +89,8 @@ if (uriStr.startsWith(ARCRecord.ARC_MAGIC_NUMBER)) { - // skip filedesc record altogether... - return null; - } - if (uriStr.startsWith(WaybackConstants.DNS_URL_PREFIX)) { + result.setMimeType(ARC_FILEDESC_VERSION); + } else if (uriStr.startsWith(WaybackConstants.DNS_URL_PREFIX)) { // skip URL + HTTP header processing for dns records... result.setUrlKey(uriStr); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2011-02-06 14:35:50 UTC (rev 3393) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2011-02-06 14:36:45 UTC (rev 3394) @@ -54,6 +54,10 @@ private static final Logger LOGGER = Logger.getLogger(WARCRecordToSearchResultAdapter.class.getName()); + + private static final String VERSION = "0.1.0"; + private static final String WARC_FILEDESC_VERSION = + "warc/warcinfo" + VERSION; private final static String DEFAULT_VALUE = "-"; private UrlCanonicalizer canonicalizer = null; @@ -126,7 +130,7 @@ } } else if(type.equals(WARCConstants.WARCINFO)) { - result.setMimeType("warc/warcinfo"); + result.setMimeType(WARC_FILEDESC_VERSION); } else { LOGGER.info("Skipping record type : " + type); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:35:56
|
Revision: 3393 http://archive-access.svn.sourceforge.net/archive-access/?rev=3393&view=rev Author: bradtofel Date: 2011-02-06 14:35:50 +0000 (Sun, 06 Feb 2011) Log Message: ----------- LOGGING: replaced stacktrace with log message Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2011-02-06 14:35:02 UTC (rev 3392) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2011-02-06 14:35:50 UTC (rev 3393) @@ -23,12 +23,12 @@ import java.net.URL; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; import org.apache.commons.httpclient.URIException; import org.apache.commons.lang.StringEscapeUtils; import org.archive.net.UURI; import org.archive.net.UURIFactory; - /** * Class which tracks the context and state involved with parsing an HTML * document via SAX events. @@ -44,6 +44,9 @@ */ public class ParseContext { + private static final Logger LOGGER = Logger.getLogger( + ParseContext.class.getName()); + protected UURI baseUrl = null; private boolean inCSS = false; @@ -114,10 +117,10 @@ } try { - return UURIFactory.getInstance(baseUrl, url).toString() + frag; } catch (URIException e) { - e.printStackTrace(); + LOGGER.warning("FAILED RESOLVE: base(" + baseUrl + ") frag(" + url + + ") error(" + e.getMessage() + ")"); } return url; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:35:08
|
Revision: 3392 http://archive-access.svn.sourceforge.net/archive-access/?rev=3392&view=rev Author: bradtofel Date: 2011-02-06 14:35:02 +0000 (Sun, 06 Feb 2011) Log Message: ----------- initial rev Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/liveweb/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/liveweb/URLtoARCCacherTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/liveweb/URLtoARCCacherTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/liveweb/URLtoARCCacherTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/liveweb/URLtoARCCacherTest.java 2011-02-06 14:35:02 UTC (rev 3392) @@ -0,0 +1,45 @@ +package org.archive.wayback.liveweb; + + +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.methods.GetMethod; +import org.archive.wayback.exception.LiveDocumentNotAvailableException; +import org.archive.wayback.util.ByteOp; + +import junit.framework.TestCase; + +public class URLtoARCCacherTest extends TestCase { + public void testSocketTimeout() throws Exception { + MultiThreadedHttpConnectionManager connectionManager = null; + HostConfiguration hostConfiguration = null; + HttpClient http = null; + connectionManager = new MultiThreadedHttpConnectionManager(); + hostConfiguration = new HostConfiguration(); + http = new HttpClient(connectionManager); + http.setHostConfiguration(hostConfiguration); + HttpMethod method = null; +// String urlString = "http://wayback.archive-it.org:6100/one"; + String urlString = "http://hello.com/one"; + int socketTimeoutMS = 10; + int connectTimeoutMS = 100; + connectionManager.getParams().setSoTimeout(socketTimeoutMS); + connectionManager.getParams().setConnectionTimeout(connectTimeoutMS); + try { + method = new GetMethod(urlString); + } catch(IllegalArgumentException e) { + throw new LiveDocumentNotAvailableException("Url:" + urlString + + "does not look like an URL?"); + } + try { + int status = http.executeMethod(method); + System.out.println("Got response code: " + status); + ByteOp.copyStream(method.getResponseBodyAsStream(), System.out); + } catch (Exception e) { + e.printStackTrace(); + } + + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:34:49
|
Revision: 3391 http://archive-access.svn.sourceforge.net/archive-access/?rev=3391&view=rev Author: bradtofel Date: 2011-02-06 14:34:43 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Added fixupHTTPUrlWithOneSlash() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2011-02-06 14:33:44 UTC (rev 3390) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2011-02-06 14:34:43 UTC (rev 3391) @@ -279,6 +279,16 @@ return sb.toString(); } + public static String fixupHTTPUrlWithOneSlash(String orig) { + if(orig.startsWith("http:/") && ! orig.startsWith(HTTP_SCHEME)) { + // very likely the IE "you must have meant 1 slash, not 2 bug: + StringBuilder sb = new StringBuilder(orig.length()+1); + sb.append(HTTP_SCHEME); + return sb.append(orig.substring(6)).toString(); + } + return orig; + } + /** * Attempt to extract the hostname component of an absolute URL argument. * @param url the url String from which to extract the hostname Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2011-02-06 14:33:44 UTC (rev 3390) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2011-02-06 14:34:43 UTC (rev 3391) @@ -19,6 +19,9 @@ */ package org.archive.wayback.util.url; +import java.net.MalformedURLException; +import java.net.URL; + import junit.framework.TestCase; /** @@ -29,6 +32,22 @@ */ public class UrlOperationsTest extends TestCase { + public void testOneSlashUrl() throws MalformedURLException { + assertEquals("http://one.com/", + UrlOperations.fixupHTTPUrlWithOneSlash("http://one.com/")); + assertEquals("http://one.com", + UrlOperations.fixupHTTPUrlWithOneSlash("http://one.com")); + assertEquals("http://http://one.com", + UrlOperations.fixupHTTPUrlWithOneSlash("http://http://one.com")); + assertEquals("http://one.com", + UrlOperations.fixupHTTPUrlWithOneSlash("http:/one.com")); + assertEquals("http://one.com/", + UrlOperations.fixupHTTPUrlWithOneSlash("http:/one.com/")); + assertEquals("http://one.com/foo.html", + UrlOperations.fixupHTTPUrlWithOneSlash("http:/one.com/foo.html")); + + } + public void testIsAuthority() { checkAuthority("foo.com",true); checkAuthority("foo.con",false); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3390 http://archive-access.svn.sourceforge.net/archive-access/?rev=3390&view=rev Author: bradtofel Date: 2011-02-06 14:33:44 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Now eating StringOutOfBoundsException in UURIFactory Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-02-06 14:32:33 UTC (rev 3389) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-02-06 14:33:44 UTC (rev 3390) @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -41,6 +42,9 @@ */ public class AggressiveUrlCanonicalizer implements UrlCanonicalizer { + private static final Logger LOGGER = Logger.getLogger( + AggressiveUrlCanonicalizer.class.getName()); + private static final String CDX_PREFIX = " CDX "; /** * Strip leading 'www.' @@ -213,7 +217,16 @@ // as building UURIs is *not* a cheap operation. // unescape anything that can be: - UURI tmpURI = UURIFactory.getInstance(searchUrl); + UURI tmpURI = null; + try { + tmpURI = UURIFactory.getInstance(searchUrl); + } catch (StringIndexOutOfBoundsException e) { + LOGGER.warning(e.getMessage() + ": " + searchUrl); + return searchUrl; +// } catch(URIException e) { +// LOGGER.warning(e.getMessage() + ": " + searchUrl); +// return searchUrl; + } tmpURI.setPath(tmpURI.getPath()); // convert to UURI to perform required URI fixup: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:32:39
|
Revision: 3389 http://archive-access.svn.sourceforge.net/archive-access/?rev=3389&view=rev Author: bradtofel Date: 2011-02-06 14:32:33 +0000 (Sun, 06 Feb 2011) Log Message: ----------- added embargo MS field, and now pass extra arguments to interstitial redirect page Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2011-02-06 14:31:46 UTC (rev 3388) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2011-02-06 14:32:33 UTC (rev 3389) @@ -86,6 +86,11 @@ /** argument for Interstitial.jsp seconds to delay */ public final static String INTERSTITIAL_SECONDS = "seconds"; + /** argument for Interstitial.jsp msse for replay date */ + public final static String INTERSTITIAL_DATE = "date"; + /** argument for Interstitial.jsp URL being loaded */ + public final static String INTERSTITIAL_URL = "url"; + private static final Logger LOGGER = Logger.getLogger( AccessPoint.class.getName()); @@ -111,7 +116,8 @@ private Properties configs = null; private List<String> filePatterns = null; - private List<String> filePrefixes = null; + private List<String> fileIncludePrefixes = null; + private List<String> fileExcludePrefixes = null; private WaybackCollection collection = null; private ExceptionRenderer exception = new BaseExceptionRenderer(); @@ -122,6 +128,7 @@ private ExclusionFilterFactory exclusionFactory = null; private BooleanOperator<WaybackRequest> authentication = null; + private long embargoMS = 0; protected boolean dispatchLocal(HttpServletRequest httpRequest, HttpServletResponse httpResponse) @@ -271,7 +278,8 @@ } } - private void checkInterstitialRedirect(HttpServletRequest httpRequest) + private void checkInterstitialRedirect(HttpServletRequest httpRequest, + WaybackRequest wbRequest) throws BetterRequestException { if((refererAuth != null) && (refererAuth.length() > 0)) { String referer = httpRequest.getHeader("Referer"); @@ -286,6 +294,16 @@ u.append("?"); u.append(INTERSTITIAL_SECONDS).append("=").append(5); u.append("&"); + u.append(INTERSTITIAL_DATE).append("=").append(wbRequest.getReplayDate().getTime()); + u.append("&"); + u.append(INTERSTITIAL_URL).append("="); + try { + u.append(URLEncoder.encode(wbRequest.getRequestUrl(), "UTF-8")); + } catch (UnsupportedEncodingException e) { + // not gonna happen... + u.append(wbRequest.getRequestUrl()); + } + u.append("&"); u.append(INTERSTITIAL_TARGET).append("="); try { u.append(URLEncoder.encode(sb.toString(), "UTF-8")); @@ -293,6 +311,7 @@ // not gonna happen... u.append(sb.toString()); } + throw new BetterRequestException(u.toString()); } } @@ -304,7 +323,7 @@ Resource resource = null; try { - checkInterstitialRedirect(httpRequest); + checkInterstitialRedirect(httpRequest,wbRequest); PerformanceLogger p = new PerformanceLogger("replay"); SearchResults results = @@ -676,8 +695,8 @@ * @return List of file String prefixes that will be matched when querying * the ResourceIndex */ - public List<String> getFilePrefixes() { - return filePrefixes; + public List<String> getFileIncludePrefixes() { + return fileIncludePrefixes; } /** @@ -685,11 +704,28 @@ * when querying the ResourceIndex - only SearchResults from files * with a prefix matching one of those in this List will be returned. */ - public void setFilePrefixes(List<String> filePrefixes) { - this.filePrefixes = filePrefixes; + public void setFileIncludePrefixes(List<String> fileIncludePrefixes) { + this.fileIncludePrefixes = fileIncludePrefixes; } + /** + * @return List of file String prefixes that will be matched when querying + * the ResourceIndex + */ + public List<String> getFileExcludePrefixes() { + return fileExcludePrefixes; + } + /** + * @param filePrefixes List of String file prefixes that will be matched + * when querying the ResourceIndex - only SearchResults from files + * with a prefix matching one of those in this List will be returned. + */ + public void setFileExcludePrefixes(List<String> fileExcludePrefixes) { + this.fileExcludePrefixes = fileExcludePrefixes; + } + + /** * @return the WaybackCollection used by this AccessPoint @@ -854,4 +890,11 @@ public void setBounceToQueryPrefix(boolean bounceToQueryPrefix) { this.bounceToQueryPrefix = bounceToQueryPrefix; } + + public long getEmbargoMS() { + return embargoMS; + } + public void setEmbargoMS(long ms) { + this.embargoMS = ms; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:31:52
|
Revision: 3388 http://archive-access.svn.sourceforge.net/archive-access/?rev=3388&view=rev Author: bradtofel Date: 2011-02-06 14:31:46 +0000 (Sun, 06 Feb 2011) Log Message: ----------- Now blocks from excludes prior to lookup Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2011-02-06 14:31:04 UTC (rev 3387) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2011-02-06 14:31:46 UTC (rev 3388) @@ -22,6 +22,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.logging.Logger; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; @@ -29,9 +30,11 @@ import org.archive.io.arc.ARCRecord; import org.archive.wayback.accesscontrol.robotstxt.RobotExclusionFilterFactory; +import org.archive.wayback.accesscontrol.staticmap.StaticMapExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AdministrativeAccessControlException; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.RobotAccessControlException; @@ -39,6 +42,7 @@ import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.util.webapp.AbstractRequestHandler; /** @@ -49,9 +53,14 @@ * */ public class LiveWebAccessPoint extends AbstractRequestHandler { + private static final Logger LOGGER = Logger.getLogger( + LiveWebAccessPoint.class.getName()); + private AccessPoint inner = null; private LiveWebCache cache = null; private RobotExclusionFilterFactory robotFactory = null; + private StaticMapExclusionFilterFactory adminFactory = null; + private long maxCacheMS = 86400000; public boolean handleRequest(HttpServletRequest httpRequest, @@ -59,7 +68,7 @@ throws ServletException, IOException { String urlString = translateRequestPathQuery(httpRequest); - + urlString = UrlOperations.fixupHTTPUrlWithOneSlash(urlString); boolean handled = true; WaybackRequest wbRequest = new WaybackRequest(); wbRequest.setAccessPoint(inner); @@ -84,6 +93,17 @@ throw new RobotAccessControlException(urlString + "is blocked by robots.txt"); } } + if(adminFactory != null) { + ExclusionFilter f = adminFactory.get(); + if(f == null) { + LOGGER.severe("Unable to get administrative exclusion filter!"); + throw new AdministrativeAccessControlException(urlString + "is blocked."); + } + int ruling = f.filterObject(result); + if(ruling == ExclusionFilter.FILTER_EXCLUDE) { + throw new AdministrativeAccessControlException(urlString + "is blocked."); + } + } // no robots check, or robots.txt says GO: ArcResource r = (ArcResource) cache.getCachedResource(url, maxCacheMS , false); ARCRecord ar = (ARCRecord) r.getArcRecord(); @@ -151,4 +171,12 @@ public void setInner(AccessPoint inner) { this.inner = inner; } + + public StaticMapExclusionFilterFactory getAdminFactory() { + return adminFactory; + } + + public void setAdminFactory(StaticMapExclusionFilterFactory adminFactory) { + this.adminFactory = adminFactory; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3387 http://archive-access.svn.sourceforge.net/archive-access/?rev=3387&view=rev Author: bradtofel Date: 2011-02-06 14:31:04 +0000 (Sun, 06 Feb 2011) Log Message: ----------- added tests for real-world examples Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterTest.java 2011-01-19 03:28:43 UTC (rev 3386) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterTest.java 2011-02-06 14:31:04 UTC (rev 3387) @@ -24,8 +24,10 @@ import java.io.IOException; import java.util.Map; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; import junit.framework.TestCase; @@ -39,6 +41,7 @@ File tmpFile = null; StaticMapExclusionFilterFactory factory = null; + UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); protected void setUp() throws Exception { super.setUp(); @@ -62,6 +65,71 @@ /** * @throws Exception */ + public void testRealWorld() throws Exception { + String bases[] = { "pho-c.co.jp/~clever", + "sf.net/pop/Roger", + "www.eva-stu.vn", + "mins.com.br/", + "24.ne.jp", + "24.ne.jp/~nekko"}; +// setTmpContents(bases); + + + ObjectFilter<CaptureSearchResult> filter = getFilter(bases); + assertFalse("unmassaged",isBlocked(filter,"24.ne.jp.idpnt.com/robots.txt")); + assertTrue("massage",isBlocked(filter,"http://24.ne.jp:80/")); + assertTrue("unmassaged",isBlocked(filter,"http://www.pho-c.co.jp/~clever")); + assertTrue("massage",isBlocked(filter,"http://24.ne.jp")); + + + assertTrue("unmassaged",isBlocked(filter,"http://www.pho-c.co.jp/~clever")); + assertTrue("massaged",isBlocked(filter,"http://pho-c.co.jp/~clever")); + assertTrue("trailing-slash",isBlocked(filter,"http://pho-c.co.jp/~clever/")); + assertTrue("subpath",isBlocked(filter,"http://pho-c.co.jp/~clever/foo.txt")); + + assertTrue("full-port",isBlocked(filter,"http://www.mins.com.br:80")); + assertTrue("tail-slash-port",isBlocked(filter,"http://www.mins.com.br:80/")); + assertTrue("full",isBlocked(filter,"http://www.mins.com.br")); + assertTrue("tail-slash",isBlocked(filter,"http://www.mins.com.br/")); + assertTrue("full-massage",isBlocked(filter,"http://mins.com.br")); + assertTrue("tail-slash-massage",isBlocked(filter,"http://mins.com.br/")); + assertTrue("massage",isBlocked(filter,"http://mins.com.br/foo.txt")); + assertTrue("subpath",isBlocked(filter,"http://www13.mins.com.br/~clever/foo.txt")); + + assertTrue("massage",isBlocked(filter,"24.ne.jp")); + assertTrue("full",isBlocked(filter,"http://www.mins.com.br")); + assertTrue("subpath",isBlocked(filter,"www.24.ne.jp")); + assertTrue("tail-slash-massage",isBlocked(filter,"http://mins.com.br/")); + assertTrue("subpath",isBlocked(filter,"http://www.24.ne.jp:80/")); + + + + + assertTrue(isBlocked(filter,"http://sf.net/pop/Roger")); + assertTrue(isBlocked(filter,"http://sf.net/pop/Roger/")); + assertTrue(isBlocked(filter,"http://sf.net/pop/Roger//")); + assertFalse(isBlocked(filter,"http://sf.net/pop/")); + assertTrue(isBlocked(filter,"http://sf.net/pop/Roger/2")); + assertTrue(isBlocked(filter,"http://sf.net/pop/Roger/23")); + assertTrue(isBlocked(filter,"http://www.sf.net/pop/Roger")); + assertTrue(isBlocked(filter,"http://www1.sf.net/pop/Roger")); + assertTrue(isBlocked(filter,"http://www23.sf.net/pop/Roger")); + + assertTrue(isBlocked(filter,"http://www23.eva-stu.vn/")); + assertTrue(isBlocked(filter,"http://www23.eva-stu.vn")); + assertTrue(isBlocked(filter,"http://eva-stu.vn")); + assertTrue(isBlocked(filter,"http://www.eva-stu.vn/")); + assertTrue(isBlocked(filter,"http://eva-stu.vn/")); + assertTrue(isBlocked(filter,"http://www.eva-stu.vn/foo.txt")); + assertTrue(isBlocked(filter,"http://www2.eva-stu.vn/foo/bar.txt")); + assertTrue(isBlocked(filter,"http://eva-stu.vn/foo/bar.txt")); + + } + + + /** + * @throws Exception + */ public void testBaseNoPrefix() throws Exception { String bases[] = {"http://www.peagreenboat.com/", "http://peagreenboat.com/"}; @@ -93,7 +161,7 @@ setTmpContents(lines); Map<String,Object> map = factory.loadFile(tmpFile.getAbsolutePath()); - return new StaticMapExclusionFilter(map); + return new StaticMapExclusionFilter(map,canonicalizer); } private void setTmpContents(String[] lines) throws IOException { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3386 http://archive-access.svn.sourceforge.net/archive-access/?rev=3386&view=rev Author: bradtofel Date: 2011-01-19 03:28:43 +0000 (Wed, 19 Jan 2011) Log Message: ----------- BUGFIX: was not grabbing location if there were case-problems Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java 2011-01-08 00:24:13 UTC (rev 3385) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java 2011-01-19 03:28:43 UTC (rev 3386) @@ -42,7 +42,8 @@ private RobotMetaFlags robotFlags; private static final Logger LOGGER = Logger.getLogger(HTTPRecordAnnotater.class.getName()); - + private static final String UPPER_LOCATION = + WaybackConstants.LOCATION_HTTP_HEADER.toUpperCase(); private final static String[] mimes = { "html" }; @@ -86,8 +87,8 @@ if (headers != null) { for (Header httpHeader : headers) { - if (httpHeader.getName().equals( - WaybackConstants.LOCATION_HTTP_HEADER)) { + if (httpHeader.getName().toUpperCase().equals( + UPPER_LOCATION)) { String locationStr = httpHeader.getValue(); // TODO: "Location" is supposed to be absolute: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3385 http://archive-access.svn.sourceforge.net/archive-access/?rev=3385&view=rev Author: bradtofel Date: 2011-01-08 00:24:13 +0000 (Sat, 08 Jan 2011) Log Message: ----------- BUGFIX(unreported) composite now sets props on all children Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2011-01-08 00:23:24 UTC (rev 3384) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2011-01-08 00:24:13 UTC (rev 3385) @@ -38,12 +38,20 @@ */ public abstract class CompositeRequestParser extends BaseRequestParser { private RequestParser[] parsers = null; - + /** * */ public void init() { parsers = getRequestParsers(); + for(RequestParser r : parsers) { + if(r instanceof BaseRequestParser) { + BaseRequestParser br = (BaseRequestParser) r; + br.setMaxRecords(getMaxRecords()); + br.setEarliestTimestamp(getEarliestTimestamp()); + br.setLatestTimestamp(getLatestTimestamp()); + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-01-08 00:23:31
|
Revision: 3384 http://archive-access.svn.sourceforge.net/archive-access/?rev=3384&view=rev Author: bradtofel Date: 2011-01-08 00:23:24 +0000 (Sat, 08 Jan 2011) Log Message: ----------- Created new SpecificCaptureReplayException, which NotAvailable and BadContent now extend, which has common handling of SearchResults to provide try-prev try-next options in error pages Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BadContentException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/SpecificCaptureReplayException.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BadContentException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BadContentException.java 2011-01-06 02:29:54 UTC (rev 3383) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BadContentException.java 2011-01-08 00:23:24 UTC (rev 3384) @@ -27,7 +27,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class BadContentException extends WaybackException { +public class BadContentException extends SpecificCaptureReplayException { /** * */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java 2011-01-06 02:29:54 UTC (rev 3383) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java 2011-01-08 00:23:24 UTC (rev 3384) @@ -31,7 +31,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class ResourceNotAvailableException extends WaybackException { +public class ResourceNotAvailableException extends SpecificCaptureReplayException { /** * */ @@ -64,13 +64,13 @@ public int getStatus() { return HttpServletResponse.SC_SERVICE_UNAVAILABLE; } - /** - * @param results - */ - public void setCaptureSearchResults(CaptureSearchResults results) { - this.results = results; - } - public CaptureSearchResults getCaptureSearchResults() { - return results; - } +// /** +// * @param results +// */ +// public void setCaptureSearchResults(CaptureSearchResults results) { +// this.results = results; +// } +// public CaptureSearchResults getCaptureSearchResults() { +// return results; +// } } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/SpecificCaptureReplayException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/SpecificCaptureReplayException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/SpecificCaptureReplayException.java 2011-01-08 00:23:24 UTC (rev 3384) @@ -0,0 +1,55 @@ +package org.archive.wayback.exception; + +import java.util.Iterator; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; + +public abstract class SpecificCaptureReplayException extends WaybackException { + protected CaptureSearchResults results = null; + protected CaptureSearchResult result = null; + protected CaptureSearchResult previous = null; + protected CaptureSearchResult next = null; + + public SpecificCaptureReplayException(String message) { + super(message); + } + + public SpecificCaptureReplayException(String message, String title, + String details) { + super(message, title, details); + } + + public SpecificCaptureReplayException(String message, String title) { + super(message, title); + } + public void setCaptureContext(CaptureSearchResults results, CaptureSearchResult result) { + + Iterator<CaptureSearchResult> itr = results.iterator(); + previous = null; + next = null; + while(itr.hasNext()) { + CaptureSearchResult cur = itr.next(); + if(cur.isClosest()) { + break; + } + previous = cur; + } + if(itr.hasNext()) { + next = itr.next(); + } + + } + public CaptureSearchResults getCaptureSearchResults() { + return results; + } + public CaptureSearchResult getCaptureSearchResult() { + return result; + } + public CaptureSearchResult getNextResult() { + return next; + } + public CaptureSearchResult getPreviousResult() { + return previous; + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2011-01-06 02:29:54 UTC (rev 3383) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2011-01-08 00:23:24 UTC (rev 3384) @@ -53,6 +53,7 @@ import org.archive.wayback.exception.BetterRequestException; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.exception.SpecificCaptureReplayException; import org.archive.wayback.exception.WaybackException; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.operator.BooleanOperator; @@ -325,16 +326,21 @@ try { resource = getCollection().getResourceStore().retrieveResource(closest); - } catch (ResourceNotAvailableException rnae) { - rnae.setCaptureSearchResults((CaptureSearchResults)results); - throw rnae; + } catch (SpecificCaptureReplayException scre) { + scre.setCaptureContext(captureResults, closest); + throw scre; } p.retrieved(); ReplayRenderer renderer = getReplay().getRenderer(wbRequest, closest, resource); - renderer.renderResource(httpRequest, httpResponse, wbRequest, - closest, resource, getUriConverter(), captureResults); + try { + renderer.renderResource(httpRequest, httpResponse, wbRequest, + closest, resource, getUriConverter(), captureResults); + } catch (SpecificCaptureReplayException scre) { + scre.setCaptureContext(captureResults, closest); + throw scre; + } p.rendered(); p.write(wbRequest.getReplayTimestamp() + " " + Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2011-01-06 02:29:54 UTC (rev 3383) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2011-01-08 00:23:24 UTC (rev 3384) @@ -1,25 +1,23 @@ -<%@ page language="java" pageEncoding="utf-8" contentType="text/html;charset=utf-8"%> -<%@ page import="java.util.List" %> -<%@ page import="java.util.Date" %> -<%@ page import="java.util.Iterator" %> -<%@ page import="org.archive.wayback.exception.WaybackException" %> -<%@ page import="org.archive.wayback.ResultURIConverter" %> -<%@ page import="org.archive.wayback.exception.ResourceNotInArchiveException"%> -<%@ page import="org.archive.wayback.exception.ResourceNotAvailableException"%> -<%@ page import="org.archive.wayback.core.CaptureSearchResult" %> -<%@ page import="org.archive.wayback.core.CaptureSearchResults" %> -<%@ page import="org.archive.wayback.core.UIResults" %> -<%@ page import="org.archive.wayback.core.WaybackRequest" %> -<%@ page import="org.archive.wayback.util.StringFormatter" %> -<%@ page import="org.archive.wayback.util.url.UrlOperations" %> -<%@ page import="org.archive.wayback.partition.PartitionsToGraph" %> - -<%@ page import="org.archive.wayback.util.partition.Partitioner" %> -<%@ page import="org.archive.wayback.util.partition.Partition" %> -<%@ page import="org.archive.wayback.util.partition.PartitionSize" %> -<%@ page import="org.archive.wayback.partition.PartitionPartitionMap" %> -<%@page import="org.archive.wayback.exception.ResourceNotAvailableException"%> -<% +<%@ page language="java" pageEncoding="utf-8" contentType="text/html;charset=utf-8" +%><%@ page import="java.util.List" +%><%@ page import="java.util.Date" +%><%@ page import="java.util.Iterator" +%><%@ page import="org.archive.wayback.exception.WaybackException" +%><%@ page import="org.archive.wayback.ResultURIConverter" +%><%@ page import="org.archive.wayback.exception.ResourceNotInArchiveException" +%><%@ page import="org.archive.wayback.core.CaptureSearchResult" +%><%@ page import="org.archive.wayback.core.CaptureSearchResults" +%><%@ page import="org.archive.wayback.core.UIResults" +%><%@ page import="org.archive.wayback.core.WaybackRequest" +%><%@ page import="org.archive.wayback.util.StringFormatter" +%><%@ page import="org.archive.wayback.util.url.UrlOperations" +%><%@ page import="org.archive.wayback.partition.PartitionsToGraph" +%><%@ page import="org.archive.wayback.util.partition.Partitioner" +%><%@ page import="org.archive.wayback.util.partition.Partition" +%><%@ page import="org.archive.wayback.util.partition.PartitionSize" +%><%@ page import="org.archive.wayback.partition.PartitionPartitionMap" +%><%@page import="org.archive.wayback.exception.SpecificCaptureReplayException" +%><% UIResults results = UIResults.extractException(request); WaybackException e = results.getException(); WaybackRequest wbr = results.getWbRequest(); @@ -73,48 +71,37 @@ <p>Try Searching all pages under <a href="<%= escapedLink %>"><%= escapedParentUrl %></a></p> <% } -} else if(e instanceof ResourceNotAvailableException) { -%> - <div class="wm-nav-link-div"> - <% - ResourceNotAvailableException rnae = (ResourceNotAvailableException) e; - - CaptureSearchResults cResults = rnae.getCaptureSearchResults(); - Iterator<CaptureSearchResult> itr = cResults.iterator(); - CaptureSearchResult prev = null; - CaptureSearchResult next = null; - while(itr.hasNext()) { - CaptureSearchResult cur = itr.next(); - if(cur.isClosest()) { - break; - } - prev = cur; - } - if(itr.hasNext()) { - next = itr.next(); - } - if((prev != null) || (next != null)) { - String dateFormat = "{0,date,MMMM dd, yyyy HH:mm:ss}"; - ResultURIConverter conv = wbr.getAccessPoint().getUriConverter(); - %> - <div>Or try another close version:</div> - <% - if(prev != null) { - String safePrevReplay = fmt.escapeHtml(conv.makeReplayURI(prev.getCaptureTimestamp(),prev.getOriginalUrl())); - %> - <div>Previous:<a href="<%= safePrevReplay %>"><%= fmt.format(dateFormat,prev.getCaptureDate())%></a></div> - <% - } - if(next != null) { - String safeNextReplay = fmt.escapeHtml(conv.makeReplayURI(next.getCaptureTimestamp(),next.getOriginalUrl())); - %> - <div>Next:<a href="<%= safeNextReplay %>"><%= fmt.format(dateFormat,next.getCaptureDate())%></a></div> - <% - } - } +} else if(e instanceof SpecificCaptureReplayException) { %> - </div> -<% + <div class="wm-nav-link-div"> + <% + SpecificCaptureReplayException scre = (SpecificCaptureReplayException) e; + + CaptureSearchResult prev = scre.getPreviousResult(); + CaptureSearchResult next = scre.getNextResult(); + String dateFormat = "{0,date,MMMM dd, yyyy HH:mm:ss}"; + ResultURIConverter conv = wbr.getAccessPoint().getUriConverter(); + if((prev != null) && (next != null)) { + String safePrevReplay = fmt.escapeHtml(conv.makeReplayURI(prev.getCaptureTimestamp(),prev.getOriginalUrl())); + String safeNextReplay = fmt.escapeHtml(conv.makeReplayURI(next.getCaptureTimestamp(),next.getOriginalUrl())); + %> + Would you like to try the <a href="<%= safePrevReplay %>">previous</a> or <a href="<%= safeNextReplay %>">next</a> date? + <% + } else if (prev != null) { + String safePrevReplay = fmt.escapeHtml(conv.makeReplayURI(prev.getCaptureTimestamp(),prev.getOriginalUrl())); + %> + Would you like to try the <a href="<%= safePrevReplay %>">previous</a> date? + <% + + } else if (next != null) { + String safeNextReplay = fmt.escapeHtml(conv.makeReplayURI(next.getCaptureTimestamp(),next.getOriginalUrl())); + %> + Would you like to try the <a href="<%= safeNextReplay %>">next</a> date? + <% + } + %> + </div> + <% } %> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3383 http://archive-access.svn.sourceforge.net/archive-access/?rev=3383&view=rev Author: bradtofel Date: 2011-01-06 02:29:54 +0000 (Thu, 06 Jan 2011) Log Message: ----------- LOG: Abbreviated url canonicalization warnings to 100 chars Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2011-01-06 02:28:14 UTC (rev 3382) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2011-01-06 02:29:54 UTC (rev 3383) @@ -182,7 +182,11 @@ String urlKey = canonicalizer.urlStringToKey(origUrl); result.setUrlKey(urlKey); } catch (URIException e) { - LOGGER.warning("FAILED canonicalize(" + origUrl + "):" + + String shortUrl = + (origUrl.length() < 100) + ? origUrl + :origUrl.substring(0,100); + LOGGER.warning("FAILED canonicalize(" + shortUrl + "):" + file + " " + offset); result.setUrlKey(origUrl); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-01-06 02:28:20
|
Revision: 3382 http://archive-access.svn.sourceforge.net/archive-access/?rev=3382&view=rev Author: bradtofel Date: 2011-01-06 02:28:14 +0000 (Thu, 06 Jan 2011) Log Message: ----------- Added AccessPoint naming and Url prefix page to main navs Modified Paths: -------------- trunk/archive-access/projects/wayback/src/site/site.xml Modified: trunk/archive-access/projects/wayback/src/site/site.xml =================================================================== --- trunk/archive-access/projects/wayback/src/site/site.xml 2011-01-04 00:01:46 UTC (rev 3381) +++ trunk/archive-access/projects/wayback/src/site/site.xml 2011-01-06 02:28:14 UTC (rev 3382) @@ -23,6 +23,7 @@ <item name="Requirements" href="requirements.html"/> <item name="Downloads" href="downloads.html"/> <item name="Administrator Manual" href="administrator_manual.html"/> + <item name="URL Prefixes and AccessPoint names" href="access_point_naming.html"/> <item name="Hadoop CDX Generation" href="hadoop.html"/> <item name="Release Notes" href="release_notes.html"/> <item name="FAQ" href="/faq.html"/> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-01-04 00:01:53
|
Revision: 3381 http://archive-access.svn.sourceforge.net/archive-access/?rev=3381&view=rev Author: bradtofel Date: 2011-01-04 00:01:46 +0000 (Tue, 04 Jan 2011) Log Message: ----------- Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-hadoop-java/.settings/ Property Changed: ---------------- trunk/archive-access/projects/wayback/ trunk/archive-access/projects/wayback/.settings/ trunk/archive-access/projects/wayback/wayback-core/ trunk/archive-access/projects/wayback/wayback-hadoop/ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/ Property changes on: trunk/archive-access/projects/wayback ___________________________________________________________________ Modified: svn:ignore - work target + work target bin .deployment Property changes on: trunk/archive-access/projects/wayback/.settings ___________________________________________________________________ Added: svn:ignore + org.eclipse.jdt.ui.prefs Property changes on: trunk/archive-access/projects/wayback/wayback-core ___________________________________________________________________ Modified: svn:ignore - target + target .settings Property changes on: trunk/archive-access/projects/wayback/wayback-hadoop ___________________________________________________________________ Modified: svn:ignore - target + target .settings Property changes on: trunk/archive-access/projects/wayback/wayback-hadoop-java/.settings ___________________________________________________________________ Added: svn:ignore + org.eclipse.jdt.core.prefs org.maven.ide.eclipse.prefs Property changes on: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF ___________________________________________________________________ Modified: svn:ignore - classes lib + classes lib GlobalCollection.xml This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |