From: <bra...@us...> - 2010-05-18 23:24:51
|
Revision: 3112 http://archive-access.svn.sourceforge.net/archive-access/?rev=3112&view=rev Author: bradtofel Date: 2010-05-18 23:24:45 +0000 (Tue, 18 May 2010) Log Message: ----------- REFACTOR: AccessPoint fields: contextPrefix and serverPrefix are now replaced with staticPrefix, replayPrefix, and queryPrefix. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2010-05-18 23:19:04 UTC (rev 3111) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2010-05-18 23:24:45 UTC (rev 3112) @@ -224,7 +224,7 @@ } /** - * @return the original URL as recieved by Wayback, before forwarding to + * @return the original URL as received by Wayback, before forwarding to * a .jsp */ public String getOriginalRequestURL() { @@ -246,7 +246,7 @@ newWBR.setCaptureQueryRequest(); newWBR.setRequestUrl(url); - return newWBR.getContextPrefix() + "query?" + + return newWBR.getAccessPoint().getQueryPrefix() + "query?" + newWBR.getQueryArguments(1); } @@ -291,10 +291,46 @@ */ public String urlForPage(int pageNum) { WaybackRequest wbRequest = getWbRequest(); - return wbRequest.getContextPrefix() + "query?" + + return wbRequest.getAccessPoint().getQueryPrefix() + "query?" + wbRequest.getQueryArguments(pageNum); } + /** + * @return the defined staticPrefix for the AccessPoint + */ + public String getStaticPrefix() { + if(wbRequest != null) { + if(wbRequest.getAccessPoint() != null) { + return wbRequest.getAccessPoint().getStaticPrefix(); + } + } + return "/"; + } + + /** + * @return the defined queryPrefix for the AccessPoint + */ + public String getQueryPrefix() { + if(wbRequest != null) { + if(wbRequest.getAccessPoint() != null) { + return wbRequest.getAccessPoint().getQueryPrefix(); + } + } + return "/"; + } + + /** + * @return the defined replayPrefix for the AccessPoint + */ + public String getReplayPrefix() { + if(wbRequest != null) { + if(wbRequest.getAccessPoint() != null) { + return wbRequest.getAccessPoint().getReplayPrefix(); + } + } + return "/"; + } + /* * FORWARD TO A .JSP */ @@ -475,7 +511,6 @@ } - /* * STATIC CONVENIENCE METHODS */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-05-18 23:19:04 UTC (rev 3111) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-05-18 23:24:45 UTC (rev 3112) @@ -37,8 +37,6 @@ import org.archive.wayback.requestparser.OpenSearchRequestParser; import org.archive.wayback.resourceindex.filters.ExclusionFilter; -import org.archive.wayback.resourceindex.filters.HostMatchFilter; -import org.archive.wayback.resourceindex.filters.SchemeMatchFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; import org.archive.wayback.util.StringFormatter; @@ -441,6 +439,8 @@ /** * @param prefix + * @deprecated use getAccessPoint.getStaticPrefix() or + * getAccessPoint.getReplayPrefix() */ public void setContextPrefix(String prefix) { contextPrefix = prefix; @@ -452,16 +452,18 @@ * * @return String absolute URL pointing to the Context root where the * request was received. + * @deprecated use AccessPoint.setReplayPrefix or setQueryPrefix */ public String getContextPrefix() { - if(contextPrefix == null) { + if(accessPoint == null) { return ""; } - return contextPrefix; + return accessPoint.getQueryPrefix(); } /** * @param prefix + * @deprecated use AccessPoint.set*Prefix */ public void setServerPrefix(String prefix) { serverPrefix = prefix; @@ -471,13 +473,15 @@ * @param prefix * @return an absolute String URL that will point to the root of the * server that is handling the request. + * @deprecated use AccessPoint.get*Prefix */ public String getServerPrefix() { - if(serverPrefix == null) { + if(accessPoint == null) { return ""; } - return serverPrefix; + return accessPoint.getQueryPrefix(); } + /** * @return the accessPoint */ @@ -500,25 +504,6 @@ this.exclusionFilter = exclusionFilter; } - @Deprecated - public ObjectFilter<CaptureSearchResult> getResultFilters() { - ObjectFilterChain<CaptureSearchResult> tmpFilters = - new ObjectFilterChain<CaptureSearchResult>(); - if(isExactHost()) { - tmpFilters.addFilter(new HostMatchFilter( - UrlOperations.urlToHost(getRequestUrl()))); - } - - if(isExactScheme()) { - tmpFilters.addFilter(new SchemeMatchFilter( - UrlOperations.urlToScheme(getRequestUrl()))); - } - if(resultFilters != null) { - tmpFilters.addFilters(resultFilters.getFilters()); - } - return tmpFilters; - } - public void setResultFilters(ObjectFilterChain<CaptureSearchResult> resultFilters) { this.resultFilters = resultFilters; } @@ -1008,6 +993,7 @@ wbRequest.serverPrefix = serverPrefix; wbRequest.formatter = formatter; + wbRequest.accessPoint = accessPoint; wbRequest.filters = new HashMap<String,String>(); Iterator<String> itr = filters.keySet().iterator(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-05-18 23:19:04 UTC (rev 3111) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-05-18 23:24:45 UTC (rev 3112) @@ -24,7 +24,10 @@ */ package org.archive.wayback.webapp; +import java.io.File; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; import java.util.List; import java.util.Locale; import java.util.Properties; @@ -79,18 +82,31 @@ */ public class AccessPoint extends AbstractRequestHandler implements ShutdownListener { + /** webapp relative location of Interstitial.jsp */ + public final static String INTERSTITIAL_JSP = "jsp/Interstitial.jsp"; + /** argument for Interstitial.jsp target URL */ + public final static String INTERSTITIAL_TARGET = "target"; + /** argument for Interstitial.jsp seconds to delay */ + public final static String INTERSTITIAL_SECONDS = "seconds"; private static final Logger LOGGER = Logger.getLogger( AccessPoint.class.getName()); - + private boolean exactHostMatch = false; private boolean exactSchemeMatch = true; private boolean useAnchorWindow = false; private boolean useServerName = false; + private boolean serveStatic = true; + private boolean bounceToReplayPrefix = false; + private boolean bounceToQueryPrefix = false; private String liveWebPrefix = null; - private String urlRoot = null; + private String staticPrefix = null; + private String queryPrefix = null; + private String replayPrefix = null; + private String refererAuth = null; + private Locale locale = null; private Properties configs = null; @@ -112,20 +128,30 @@ protected boolean dispatchLocal(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { - - String translated = "/" + translateRequestPathQuery(httpRequest); - - WaybackRequest wbRequest = new WaybackRequest(); - wbRequest.setContextPrefix(getUrlRoot()); - wbRequest.setAccessPoint(this); - wbRequest.fixup(httpRequest); - UIResults uiResults = new UIResults(wbRequest,uriConverter); - try { - uiResults.forward(httpRequest, httpResponse, translated); - return true; - } catch(IOException e) { - // TODO: figure out if we got IO because of a missing dispatcher + if(!serveStatic) { + return false; } +// String contextRelativePath = httpRequest.getServletPath(); + String translated = "/" + translateRequestPath(httpRequest); +// String absPath = getServletContext().getRealPath(contextRelativePath); + String absPath = getServletContext().getRealPath(translated); + File test = new File(absPath); + if(test.exists()) { + + String translated2 = "/" + translateRequestPathQuery(httpRequest); + + WaybackRequest wbRequest = new WaybackRequest(); +// wbRequest.setContextPrefix(getUrlRoot()); + wbRequest.setAccessPoint(this); + wbRequest.fixup(httpRequest); + UIResults uiResults = new UIResults(wbRequest,uriConverter); + try { + uiResults.forward(httpRequest, httpResponse, translated2); + return true; + } catch(IOException e) { + // TODO: figure out if we got IO because of a missing dispatcher + } + } return false; } @@ -152,7 +178,7 @@ // TODO: refactor this code into RequestParser implementations wbRequest.setAccessPoint(this); // wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); - wbRequest.setContextPrefix(getUrlRoot()); +// wbRequest.setContextPrefix(getUrlRoot()); wbRequest.fixup(httpRequest); // end of refactor @@ -179,11 +205,26 @@ wbRequest.setExactScheme(isExactSchemeMatch()); if(wbRequest.isReplayRequest()) { - + if(bounceToReplayPrefix) { + // we don't accept replay requests on this AccessPoint + // bounce the user to the right place: + String suffix = translateRequestPathQuery(httpRequest); + String replayUrl = replayPrefix + suffix; + httpResponse.sendRedirect(replayUrl); + return true; + } handleReplay(wbRequest,httpRequest,httpResponse); } else { + if(bounceToQueryPrefix) { + // we don't accept replay requests on this AccessPoint + // bounce the user to the right place: + String suffix = translateRequestPathQuery(httpRequest); + String replayUrl = queryPrefix + suffix; + httpResponse.sendRedirect(replayUrl); + return true; + } wbRequest.setExactHost(isExactHostMatch()); handleQuery(wbRequest,httpRequest,httpResponse); } @@ -198,7 +239,9 @@ } catch(WaybackException e) { boolean drawError = true; if(e instanceof ResourceNotInArchiveException) { - if(getLiveWebPrefix() != null) { + if((getLiveWebPrefix() != null) + && (getLiveWebPrefix().length() > 0)) { + String liveUrl = getLiveWebPrefix() + wbRequest.getRequestUrl(); httpResponse.sendRedirect(liveUrl); @@ -220,18 +263,48 @@ String url = r.getRequestUrl(); StringBuilder sb = new StringBuilder(100); sb.append("NotInArchive\t"); - sb.append(getUrlRoot()).append("\t"); + sb.append(getBeanName()).append("\t"); sb.append(url); LOGGER.info(sb.toString()); } } + private void checkInterstitialRedirect(HttpServletRequest httpRequest) + throws BetterRequestException { + if(refererAuth != null) { + String referer = httpRequest.getHeader("Referer"); + if((referer == null) || (!referer.contains(refererAuth))) { + StringBuffer sb = httpRequest.getRequestURL(); + if(httpRequest.getQueryString() != null) { + sb.append("?").append(httpRequest.getQueryString()); + } + StringBuilder u = new StringBuilder(); + u.append(getQueryPrefix()); + u.append(INTERSTITIAL_JSP); + u.append("?"); + u.append(INTERSTITIAL_SECONDS).append("=").append(5); + u.append("&"); + u.append(INTERSTITIAL_TARGET).append("="); + try { + u.append(URLEncoder.encode(sb.toString(), "UTF-8")); + } catch (UnsupportedEncodingException e) { + // not gonna happen... + u.append(sb.toString()); + } + throw new BetterRequestException(u.toString()); + } + } + } + private void handleReplay(WaybackRequest wbRequest, HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws IOException, ServletException, WaybackException { Resource resource = null; try { + + checkInterstitialRedirect(httpRequest); + PerformanceLogger p = new PerformanceLogger("replay"); SearchResults results = getCollection().getResourceIndex().query(wbRequest); @@ -241,7 +314,7 @@ } CaptureSearchResults captureResults = (CaptureSearchResults) results; - + // TODO: check which versions are actually accessible right now? CaptureSearchResult closest = captureResults.getClosest(wbRequest, isUseAnchorWindow()); @@ -308,6 +381,16 @@ } } + private String getBestPrefix(String best, String next, String last) { + if(best != null) { + return best; + } + if(next != null) { + return next; + } + return last; + } + /* * ******************************************************************* * ******************************************************************* @@ -369,7 +452,7 @@ /** * @return the useServerName - * @deprecated no longer used, use urlPrefix + * @deprecated no longer used, use {replay,query,static}Prefix */ public boolean isUseServerName() { return useServerName; @@ -377,13 +460,28 @@ /** * @param useServerName the useServerName to set - * @deprecated no longer used, use urlPrefix + * @deprecated no longer used, use {replay,query,static}Prefix */ public void setUseServerName(boolean useServerName) { this.useServerName = useServerName; } /** + * @return true if this AccessPoint serves static content + */ + public boolean isServeStatic() { + return serveStatic; + } + + /** + * @param serveStatic if set to true, this AccessPoint will serve static + * content, and .jsp files + */ + public void setServeStatic(boolean serveStatic) { + this.serveStatic = serveStatic; + } + + /** * @return the liveWebPrefix String to use, or null, if this AccessPoint * does not use the Live Web to fill in documents missing from the archive */ @@ -401,21 +499,73 @@ /** * @return the String url prefix to use when generating self referencing - * URLs + * static URLs */ - public String getUrlRoot() { - return urlRoot; + public String getStaticPrefix() { + return getBestPrefix(staticPrefix,queryPrefix,replayPrefix); } /** - * @param urlRoot explicit URL prefix to use when creating self referencing - * URLs + * @param staticPrefix explicit URL prefix to use when creating self referencing + * static URLs */ + public void setStaticPrefix(String staticPrefix) { + this.staticPrefix = staticPrefix; + } + + /** + * @return the String url prefix to use when generating self referencing + * replay URLs + */ + public String getReplayPrefix() { + return getBestPrefix(replayPrefix,queryPrefix,staticPrefix); + } + + /** + * @param replayPrefix explicit URL prefix to use when creating self referencing + * replay URLs + */ + public void setReplayPrefix(String replayPrefix) { + this.replayPrefix = replayPrefix; + } + + /** + * @param queryPrefix explicit URL prefix to use when creating self referencing + * query URLs + */ + public void setQueryPrefix(String queryPrefix) { + this.queryPrefix = queryPrefix; + } + + /** + * @return the String url prefix to use when generating self referencing + * replay URLs + */ + public String getQueryPrefix() { + return getBestPrefix(queryPrefix,staticPrefix,replayPrefix); + } + + /** + * @param urlRoot explicit URL prefix to use when creating ANY self + * referencing URLs + * @deprecated use setQueryPrefix, setReplayPrefix, setStaticPrefix + */ public void setUrlRoot(String urlRoot) { - this.urlRoot = urlRoot; + this.queryPrefix = urlRoot; + this.replayPrefix = urlRoot; + this.staticPrefix = urlRoot; } /** + * @return the String url prefix used when generating self referencing + * URLs + * @deprecated use getQueryPrefix, getReplayPrefix, getStaticPrefix + */ + public String getUrlRoot() { + return getBestPrefix(queryPrefix,staticPrefix,replayPrefix); + } + + /** * @return explicit Locale to use within this AccessPoint. */ public Locale getLocale() { @@ -605,4 +755,45 @@ public void setAuthentication(BooleanOperator<WaybackRequest> auth) { this.authentication = auth; } + + /** + * @return the refererAuth + */ + public String getRefererAuth() { + return refererAuth; + } + + /** + * @param refererAuth the refererAuth to set + */ + public void setRefererAuth(String refererAuth) { + this.refererAuth = refererAuth; + } + + /** + * @return the bounceToReplayPrefix + */ + public boolean isBounceToReplayPrefix() { + return bounceToReplayPrefix; + } + + /** + * @param bounceToReplayPrefix the bounceToReplayPrefix to set + */ + public void setBounceToReplayPrefix(boolean bounceToReplayPrefix) { + this.bounceToReplayPrefix = bounceToReplayPrefix; + } + /** + * @return the bounceToQueryPrefix + */ + public boolean isBounceToQueryPrefix() { + return bounceToQueryPrefix; + } + + /** + * @param bounceToQueryPrefix the bounceToQueryPrefix to set + */ + public void setBounceToQueryPrefix(boolean bounceToQueryPrefix) { + this.bounceToQueryPrefix = bounceToQueryPrefix; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-29 00:48:42
|
Revision: 3131 http://archive-access.svn.sourceforge.net/archive-access/?rev=3131&view=rev Author: bradtofel Date: 2010-05-29 00:48:36 +0000 (Sat, 29 May 2010) Log Message: ----------- INTERFACE: reduced specificity from Resource to InputStream, add method to get the full ParseContext metdata Map, added new Interface representing all available parse events. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java 2010-05-28 23:18:42 UTC (rev 3130) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java 2010-05-29 00:48:36 UTC (rev 3131) @@ -25,6 +25,7 @@ package org.archive.wayback.replay.charset; import java.io.IOException; +import java.io.InputStream; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.util.Iterator; @@ -117,7 +118,7 @@ * @return String character set found from META tags in the HTML * @throws IOException */ - protected String getCharsetFromMeta(Resource resource) throws IOException { + protected String getCharsetFromMeta(InputStream resource) throws IOException { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; @@ -143,7 +144,7 @@ * @return String character encoding found, or null if nothing looked good. * @throws IOException */ - protected String getCharsetFromBytes(Resource resource) throws IOException { + protected String getCharsetFromBytes(InputStream resource) throws IOException { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-28 23:18:42 UTC (rev 3130) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-29 00:48:36 UTC (rev 3131) @@ -27,6 +27,7 @@ import java.net.URISyntaxException; import java.net.URL; import java.util.HashMap; +import java.util.Map; import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; @@ -77,6 +78,9 @@ public String getData(String key) { return data.get(key); } + public Map<String,String> getMap() { + return data; + } /** * @param url against which relative URLs should be resolved for this parse */ @@ -102,7 +106,8 @@ url = url.substring(0,hashIdx); } try { - return UURIFactory.getInstance(baseUrl, url).toString() + frag; + return baseUrl.resolve(url,false).toString() + frag; +// return UURIFactory.getInstance(baseUrl, url).toString() + frag; } catch (URIException e) { e.printStackTrace(); } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java 2010-05-29 00:48:36 UTC (rev 3131) @@ -0,0 +1,7 @@ +package org.archive.wayback.util.htmllex.handlers; + +public interface AllEventsHandler extends CloseTagHandler, ContentTextHandler, +CSSTextHandler, JSTextHandler, OpenTagHandler, ParseCompleteHandler, +RemarkTextHandler { + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 23:58:49
|
Revision: 3184 http://archive-access.svn.sourceforge.net/archive-access/?rev=3184&view=rev Author: bradtofel Date: 2010-07-20 23:58:43 +0000 (Tue, 20 Jul 2010) Log Message: ----------- Created IORuntimeException so ZiplinesIterator can maintain Iterator interface, but throws an exception discernible as an IO problem Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RuntimeIOException.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RuntimeIOException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RuntimeIOException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RuntimeIOException.java 2010-07-20 23:58:43 UTC (rev 3184) @@ -0,0 +1,36 @@ +/* RuntimeIOException + * + * $Id$: + * + * Created on Jun 24, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.exception; + +/** + * @author brad + * + */ +public class RuntimeIOException extends RuntimeException { + + /** */ + private static final long serialVersionUID = -4762025404760379497L; +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/RuntimeIOException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2010-07-20 23:56:53 UTC (rev 3183) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2010-07-20 23:58:43 UTC (rev 3184) @@ -43,6 +43,7 @@ import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.exception.RuntimeIOException; import org.archive.wayback.resourceindex.adapters.CaptureToUrlSearchResultIterator; import org.archive.wayback.resourceindex.filterfactory.AccessPointCaptureFilterGroupFactory; import org.archive.wayback.resourceindex.filterfactory.CaptureFilterGroup; @@ -173,6 +174,9 @@ WindowFilterGroup<CaptureSearchResult> window = new WindowFilterGroup<CaptureSearchResult>(wbRequest,this); List<CaptureFilterGroup> groups = getRequestFilterGroups(wbRequest); + if(filter != null) { + filters.addFilter(filter); + } for(CaptureFilterGroup cfg : groups) { filters.addFilters(cfg.getFilters()); @@ -182,16 +186,17 @@ CloseableIterator<CaptureSearchResult> itr = new ObjectFilterIterator<CaptureSearchResult>( source.getPrefixIterator(urlKey),filters); - - while(itr.hasNext()) { - results.addSearchResult(itr.next()); + try { + while(itr.hasNext()) { + results.addSearchResult(itr.next()); + } + } catch(RuntimeIOException e) { + throw new ResourceIndexNotAvailableException(e.getLocalizedMessage()); } - for(CaptureFilterGroup cfg : groups) { cfg.annotateResults(results); } window.annotateResults(results); - cleanupIterator(itr); return results; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2010-07-20 23:56:53 UTC (rev 3183) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2010-07-20 23:58:43 UTC (rev 3184) @@ -38,6 +38,8 @@ import java.util.zip.GZIPInputStream; import org.apache.log4j.Logger; +import org.archive.wayback.exception.ResourceIndexNotAvailableException; +import org.archive.wayback.exception.RuntimeIOException; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.webapp.AccessPoint; @@ -88,7 +90,7 @@ try { br = blockItr.next().readBlock(); } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeIOException(); } } else { return false; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-08-10 20:03:45
|
Revision: 3221 http://archive-access.svn.sourceforge.net/archive-access/?rev=3221&view=rev Author: bradtofel Date: 2010-08-10 20:03:38 +0000 (Tue, 10 Aug 2010) Log Message: ----------- backing out all wrapper changes Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2010-08-10 19:55:56 UTC (rev 3220) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2010-08-10 20:03:38 UTC (rev 3221) @@ -349,34 +349,6 @@ /** * Store this UIResults object in the given HttpServletRequest, then - * forward the request to wrapper, which *should* subsequently forward the - * request to target. - * - * @param request the HttpServletRequest - * @param response the HttpServletResponse - * @param target the String path to the .jsp to handle drawing the data, - * relative to the contextRoot (ex. "/WEB-INF/query/foo.jsp") - * @param wrapper the wrapper page which should generate the header, - * forward control to 'target' and then generate the footer. - * @throws ServletException for usual reasons... - * @throws IOException for usual reasons... - */ - public void forwardWrapped(HttpServletRequest request, - HttpServletResponse response, final String target, String wrapper) - throws ServletException, IOException { - - this.contentJsp = target; - this.originalRequestURL = request.getRequestURL().toString(); - request.setAttribute(FERRET_NAME, this); - RequestDispatcher dispatcher = request.getRequestDispatcher(wrapper); - if(dispatcher == null) { - throw new IOException("No dispatcher for " + target); - } - dispatcher.forward(request, response); - } - - /** - * Store this UIResults object in the given HttpServletRequest, then * forward the request to target, in this case, an image, html file, .jsp, * any file which can return a complete document. Specifically, this means * that if target is a .jsp, it must render it's own header and footer. Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2010-08-10 19:55:56 UTC (rev 3220) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2010-08-10 20:03:38 UTC (rev 3221) @@ -142,13 +142,7 @@ } } if(!handled) { - String wrapperJsp = wbRequest.getAccessPoint().getWrapperJsp(); - if(wrapperJsp != null) { - uiResults.forwardWrapped(httpRequest, httpResponse, - errorJsp, wrapperJsp); - } else { - uiResults.forward(httpRequest, httpResponse, errorJsp); - } + uiResults.forward(httpRequest, httpResponse, errorJsp); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2010-08-10 19:55:56 UTC (rev 3220) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2010-08-10 20:03:38 UTC (rev 3221) @@ -61,13 +61,7 @@ if(wbRequest.isXMLMode()) { uiResults.forward(httpRequest, httpResponse, xmlCaptureJsp); } else { - String wrapperJsp = wbRequest.getAccessPoint().getWrapperJsp(); - if(wrapperJsp != null) { - uiResults.forwardWrapped(httpRequest, httpResponse, - captureJsp, wrapperJsp); - } else { - uiResults.forward(httpRequest, httpResponse, captureJsp); - } + uiResults.forward(httpRequest, httpResponse, captureJsp); } } @@ -83,14 +77,7 @@ if(wbRequest.isXMLMode()) { uiResults.forward(httpRequest, httpResponse, xmlUrlJsp); } else { - String wrapperJsp = wbRequest.getAccessPoint().getWrapperJsp(); - if(wrapperJsp != null) { - - uiResults.forwardWrapped(httpRequest, httpResponse, - urlJsp,wbRequest.getAccessPoint().getWrapperJsp()); - } else { - uiResults.forward(httpRequest, httpResponse, urlJsp); - } + uiResults.forward(httpRequest, httpResponse, urlJsp); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java 2010-08-10 19:55:56 UTC (rev 3220) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java 2010-08-10 20:03:38 UTC (rev 3221) @@ -49,7 +49,6 @@ */ public class JSPReplayRenderer implements ReplayRenderer { private String targetJsp = null; - private boolean wrap = true; public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, @@ -58,13 +57,8 @@ throws ServletException, IOException, WaybackException { UIResults uiResults = new UIResults(wbRequest, uriConverter, results, result, resource); - if(wrap) { - uiResults.forwardWrapped(httpRequest, httpResponse, - targetJsp, wbRequest.getAccessPoint().getWrapperJsp()); - } else { - uiResults.forward(httpRequest, httpResponse, - targetJsp); - } + uiResults.forward(httpRequest, httpResponse, + targetJsp); } /** @@ -82,23 +76,4 @@ public void setTargetJsp(String targetJsp) { this.targetJsp = targetJsp; } - - /** - * @return true if the jsp should be wrapped in the stardard UI template - * wrapper jsp for the AccessPoint. - */ - public boolean isWrap() { - return wrap; - } - - /** - * @param wrap if true then the jsp configured for this page will be - * wrapped in the standard template used for the current AccessPoint, if - * false then the jsp configured is responsible for rendering the entire - * content. - */ - public void setWrap(boolean wrap) { - this.wrap = wrap; - } - } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-08-10 19:55:56 UTC (rev 3220) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-08-10 20:03:38 UTC (rev 3221) @@ -105,8 +105,6 @@ private String queryPrefix = null; private String replayPrefix = null; -// private String wrapperJsp = "/WEB-INF/template/UI-wrapper.jsp"; - private String wrapperJsp = null; private String interstitialJsp = INTERSTITIAL_JSP; private String refererAuth = null; @@ -149,14 +147,7 @@ wbRequest.fixup(httpRequest); UIResults uiResults = new UIResults(wbRequest,uriConverter); try { - if(wrapperJsp != null && - translatedNoQuery.endsWith("-wrap.jsp")) { - - uiResults.forwardWrapped(httpRequest, httpResponse, - translatedQ, wrapperJsp); - } else { - uiResults.forward(httpRequest, httpResponse, translatedQ); - } + uiResults.forward(httpRequest, httpResponse, translatedQ); return true; } catch(IOException e) { // TODO: figure out if we got IO because of a missing dispatcher @@ -559,23 +550,8 @@ public String getQueryPrefix() { return getBestPrefix(queryPrefix,staticPrefix,replayPrefix); } - /** - * @return the String url prefix to use when generating self referencing - * replay URLs - */ - public String getWrapperJsp() { - return wrapperJsp; - } /** - * @param wrapperJsp the .jsp file responsible for generating the HTML - * wrapper for content. - */ - public void setWrapperJsp(String wrapperJsp) { - this.wrapperJsp = wrapperJsp; - } - - /** * @param interstitialJsp the interstitialJsp to set */ public void setInterstitialJsp(String interstitialJsp) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-08-16 22:57:56
|
Revision: 3229 http://archive-access.svn.sourceforge.net/archive-access/?rev=3229&view=rev Author: bradtofel Date: 2010-08-16 22:57:47 +0000 (Mon, 16 Aug 2010) Log Message: ----------- REFACTOR: moving Memento stuff into it's own package -- it's really a new Replay format, so like archivalurl and proxy mode, should have it's own package. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoHTMLReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleParser.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/MementoParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/TimeBundleParser.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoHTMLReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoHTMLReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoHTMLReplayRenderer.java 2010-08-16 22:57:47 UTC (rev 3229) @@ -0,0 +1,97 @@ +/* MementoHTMLReplayRenderer + * + * $Id$ + * + * Created on 2:23:03 PM Jul 18, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.memento; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.TextDocument; +import org.archive.wayback.replay.TextReplayRenderer; +import org.archive.wayback.replay.HttpHeaderProcessor; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class MementoHTMLReplayRenderer extends TextReplayRenderer { + /** + * @param httpHeaderProcessor + */ + public MementoHTMLReplayRenderer(HttpHeaderProcessor httpHeaderProcessor) { + super(httpHeaderProcessor); + } + + /* (non-Javadoc) + * @see org.archive.wayback.replay.HTMLReplayRenderer#updatePage(org.archive.wayback.replay.HTMLPage, javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.CaptureSearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.CaptureSearchResults) + */ + @Override + protected void updatePage(TextDocument page, HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) + throws ServletException, IOException { + + List<String> jspInserts = getJspInserts(); + + StringBuilder toInsert = new StringBuilder(300); + + /* + * URLs have to resolve as they did originally: + * + * * absolute URLs are fine as-is + * + * * page-relative and server-relative URLs have to resolve against + * the page's original URL, not where ever it is being served from + * now. Hopefully, this can be accomplished by adding a + * <BASE href="XXX"> + * with the original page URL unless the page contains one + * already.. + */ + page.addBase(); + + if(jspInserts != null) { + Iterator<String> itr = jspInserts.iterator(); + while(itr.hasNext()) { + toInsert.append(page.includeJspString(itr.next(), httpRequest, + httpResponse, wbRequest, results, result, resource)); + } + } + + // insert the new content: + page.insertAtEndOfBody(toInsert.toString()); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoHTMLReplayRenderer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoParser.java (from rev 3191, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/MementoParser.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoParser.java 2010-08-16 22:57:47 UTC (rev 3229) @@ -0,0 +1,157 @@ +package org.archive.wayback.memento; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import javax.servlet.http.HttpServletRequest; + +import org.apache.log4j.Logger; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.BetterRequestException; +import org.archive.wayback.requestparser.BaseRequestParser; +import org.archive.wayback.requestparser.WrappedRequestParser; +import org.archive.wayback.webapp.AccessPoint; + +/** + * RequestParser subclass which matches ".../timegate/URL" requests, and parses + * the Accept-Datetime header + * + * @author Lyudmila Balakireva + * + */ +public class MementoParser extends WrappedRequestParser { + private static final Logger LOGGER = + Logger.getLogger(MementoParser.class.getName()); + + String DTHEADER = "Accept-Datetime"; + + List<SimpleDateFormat> dtsupportedformats = + new ArrayList<SimpleDateFormat>(); + + String MEMENTO_BASE = "timegate"; + + /** + * @param wrapped + * BaseRequestParser with configuration + */ + public MementoParser(BaseRequestParser wrapped) { + super(wrapped); + + dtsupportedformats + .add(new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss Z")); + dtsupportedformats.add(new SimpleDateFormat("E, dd MMM yyyy Z")); + dtsupportedformats.add(new SimpleDateFormat("E, dd MMM yyyy")); + } + + @Override + public WaybackRequest parse(HttpServletRequest httpRequest, + AccessPoint accessPoint) throws BadQueryException, + BetterRequestException { + + String base = accessPoint.translateRequestPath(httpRequest); + String requestPath = accessPoint.translateRequestPathQuery(httpRequest); + + LOGGER.trace("requestPath:" + requestPath); + if (base.startsWith(MEMENTO_BASE)) { + + // strip leading "timegate/": + String urlStr = base.substring(requestPath.indexOf("/") + 1); + + // get the "Accept-Datetime" header: + String httpdate = getHttpDate(httpRequest); + Date dtconnegdate = null; + if (httpdate != null) { + dtconnegdate = checkDateValidity(httpdate, dtsupportedformats); + if (dtconnegdate == null) { + return null; + } + } else { + // TODO: should this return null her? no header.. + } + + WaybackRequest wbRequest = new WaybackRequest(); + if (wbRequest.getStartTimestamp() == null) { + wbRequest.setStartTimestamp(getEarliestTimestamp()); + } + if (dtconnegdate != null) { + wbRequest.setAnchorDate(dtconnegdate); + } else { + wbRequest.setAnchorTimestamp(getLatestTimestamp()); + } + + wbRequest.put("dtconneg", httpdate); + + if (wbRequest.getEndTimestamp() == null) { + wbRequest.setEndTimestamp(getLatestTimestamp()); + } + wbRequest.setCaptureQueryRequest(); + wbRequest.setRequestUrl(urlStr); + if (wbRequest != null) { + wbRequest.setResultsPerPage(getMaxRecords()); + } + return wbRequest; + } + return null; + } + + /** + * Extract the value of the "Accept-Datetime" HTTP request header, if + * present, and further strips the date value from any surrounding "{","}" + * @param req HttpServletRequest for this request + * @return the raw String containing the date information, or null if no + * such HTTP header exists. + */ + public String getHttpDate(HttpServletRequest req) { + String httpdate = req.getHeader(DTHEADER); + + if (httpdate != null) { + int j = httpdate.indexOf("{", 0); + + if (j >= 0) { + + httpdate = httpdate.substring(httpdate.indexOf("{", 0) + 1); + + } + + if (httpdate.indexOf("}") > 0) { + httpdate = httpdate.substring(0, httpdate.indexOf("}")); + + } + } + return httpdate; + } + + /** + * Attempt to parse the String httpdate argument using one of the + * SimpleDateFormats provided. + * + * @param httpdate + * String version of a Date + * @param list + * of SimpleDateFormats to parse the httpdate + * @return Date object set to the time parsed, or null if not parsed + */ + public Date checkDateValidity(String httpdate, List<SimpleDateFormat> list) { + + Date d = null; + Iterator<SimpleDateFormat> it = list.iterator(); + while (it.hasNext()) { + SimpleDateFormat formatter = it.next(); + try { + + d = formatter.parse(httpdate); + break; + + } catch (Exception e) { + e.printStackTrace(); + } + + } + + return d; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoRequestParser.java 2010-08-16 22:57:47 UTC (rev 3229) @@ -0,0 +1,59 @@ +/* MementoRequestParser + * + * $Id$: + * + * Created on Aug 16, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.memento; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.archivalurl.ArchivalUrlRequestParser; +import org.archive.wayback.archivalurl.requestparser.ArchivalUrlFormRequestParser; +import org.archive.wayback.archivalurl.requestparser.DatelessReplayRequestParser; +import org.archive.wayback.archivalurl.requestparser.PathDatePrefixQueryRequestParser; +import org.archive.wayback.archivalurl.requestparser.PathDateRangeQueryRequestParser; +import org.archive.wayback.archivalurl.requestparser.PathPrefixDatePrefixQueryRequestParser; +import org.archive.wayback.archivalurl.requestparser.PathPrefixDateRangeQueryRequestParser; +import org.archive.wayback.archivalurl.requestparser.ReplayRequestParser; +import org.archive.wayback.requestparser.OpenSearchRequestParser; + +/** + * @author brad + * + */ +public class MementoRequestParser extends ArchivalUrlRequestParser { + protected RequestParser[] getRequestParsers() { + RequestParser[] theParsers = { + new ReplayRequestParser(this), + new MementoParser(this), + new TimeBundleParser(this), + new PathDatePrefixQueryRequestParser(this), + new PathDateRangeQueryRequestParser(this), + new PathPrefixDatePrefixQueryRequestParser(this), + new PathPrefixDateRangeQueryRequestParser(this), + new OpenSearchRequestParser(this), + new ArchivalUrlFormRequestParser(this), + new DatelessReplayRequestParser(this) + }; + return theParsers; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/MementoRequestParser.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleParser.java (from rev 3191, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/TimeBundleParser.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleParser.java 2010-08-16 22:57:47 UTC (rev 3229) @@ -0,0 +1,90 @@ +package org.archive.wayback.memento; + +import javax.servlet.http.HttpServletRequest; + +import org.apache.log4j.Logger; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.BetterRequestException; +import org.archive.wayback.requestparser.BaseRequestParser; +import org.archive.wayback.requestparser.WrappedRequestParser; +import org.archive.wayback.webapp.AccessPoint; + +/** + * RequestParser subclass which parses "timebundle/URL" and + * "timemap/FORMAT/URL" requests + * + * @author Lyudmila Balakireva + * + */ +public class TimeBundleParser extends WrappedRequestParser { + private static final Logger LOGGER = + Logger.getLogger(TimeBundleParser.class.getName()); + + String MEMENTO_BASE = "timegate"; + + /** + * @param wrapped BaseRequestParser holding config + */ + public TimeBundleParser(BaseRequestParser wrapped) { + super(wrapped); + } + + @Override + public WaybackRequest parse(HttpServletRequest httpRequest, + AccessPoint accessPoint) throws BadQueryException, + BetterRequestException { + + String requestPath = accessPoint.translateRequestPathQuery(httpRequest); + LOGGER.trace("requestpath:" + requestPath); + + if (requestPath.startsWith("timebundle")) { + + WaybackRequest wbRequest = new WaybackRequest(); + String urlStr = requestPath.substring(requestPath.indexOf("/") + 1); + if (wbRequest.getStartTimestamp() == null) { + wbRequest.setStartTimestamp(getEarliestTimestamp()); + } + if (wbRequest.getEndTimestamp() == null) { + wbRequest.setEndTimestamp(getLatestTimestamp()); + } + wbRequest.setCaptureQueryRequest(); + wbRequest.setRequestUrl(urlStr); + + // TODO: is it critical to return a 303 code, or will a 302 do? + // if so, this and ORE.jsp can be simplified by throwing a + // BetterRequestException here. + wbRequest.put("redirect", "true"); + return wbRequest; + } + + if (requestPath.startsWith("timemap")) { + + String urlStrplus = requestPath + .substring(requestPath.indexOf("/") + 1); + String format = urlStrplus.substring(0, urlStrplus.indexOf("/")); + + LOGGER.trace("format:" + format); + String urlStr = urlStrplus.substring(urlStrplus.indexOf("/") + 1); + LOGGER.trace("id:" + urlStr); + WaybackRequest wbRequest = new WaybackRequest(); + if (wbRequest.getStartTimestamp() == null) { + wbRequest.setStartTimestamp(getEarliestTimestamp()); + } + wbRequest.setAnchorTimestamp(getLatestTimestamp()); + wbRequest.put("format", format); + if (wbRequest.getEndTimestamp() == null) { + wbRequest.setEndTimestamp(getLatestTimestamp()); + } + wbRequest.setCaptureQueryRequest(); + wbRequest.setRequestUrl(urlStr); + if (wbRequest != null) { + wbRequest.setResultsPerPage(getMaxRecords()); + } + return wbRequest; + + } + return null; + } + +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/MementoParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/MementoParser.java 2010-08-10 23:27:47 UTC (rev 3228) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/MementoParser.java 2010-08-16 22:57:47 UTC (rev 3229) @@ -1,155 +0,0 @@ -package org.archive.wayback.requestparser; - -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.Iterator; -import java.util.List; - -import javax.servlet.http.HttpServletRequest; - -import org.apache.log4j.Logger; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.BetterRequestException; -import org.archive.wayback.webapp.AccessPoint; - -/** - * RequestParser subclass which matches ".../timegate/URL" requests, and parses - * the Accept-Datetime header - * - * @author Lyudmila Balakireva - * - */ -public class MementoParser extends WrappedRequestParser { - private static final Logger LOGGER = - Logger.getLogger(MementoParser.class.getName()); - - String DTHEADER = "Accept-Datetime"; - - List<SimpleDateFormat> dtsupportedformats = - new ArrayList<SimpleDateFormat>(); - - String MEMENTO_BASE = "timegate"; - - /** - * @param wrapped - * BaseRequestParser with configuration - */ - public MementoParser(BaseRequestParser wrapped) { - super(wrapped); - - dtsupportedformats - .add(new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss Z")); - dtsupportedformats.add(new SimpleDateFormat("E, dd MMM yyyy Z")); - dtsupportedformats.add(new SimpleDateFormat("E, dd MMM yyyy")); - } - - @Override - public WaybackRequest parse(HttpServletRequest httpRequest, - AccessPoint accessPoint) throws BadQueryException, - BetterRequestException { - - String base = accessPoint.translateRequestPath(httpRequest); - String requestPath = accessPoint.translateRequestPathQuery(httpRequest); - - LOGGER.trace("requestPath:" + requestPath); - if (base.startsWith(MEMENTO_BASE)) { - - // strip leading "timegate/": - String urlStr = base.substring(requestPath.indexOf("/") + 1); - - // get the "Accept-Datetime" header: - String httpdate = getHttpDate(httpRequest); - Date dtconnegdate = null; - if (httpdate != null) { - dtconnegdate = checkDateValidity(httpdate, dtsupportedformats); - if (dtconnegdate == null) { - return null; - } - } else { - // TODO: should this return null her? no header.. - } - - WaybackRequest wbRequest = new WaybackRequest(); - if (wbRequest.getStartTimestamp() == null) { - wbRequest.setStartTimestamp(getEarliestTimestamp()); - } - if (dtconnegdate != null) { - wbRequest.setAnchorDate(dtconnegdate); - } else { - wbRequest.setAnchorTimestamp(getLatestTimestamp()); - } - - wbRequest.put("dtconneg", httpdate); - - if (wbRequest.getEndTimestamp() == null) { - wbRequest.setEndTimestamp(getLatestTimestamp()); - } - wbRequest.setCaptureQueryRequest(); - wbRequest.setRequestUrl(urlStr); - if (wbRequest != null) { - wbRequest.setResultsPerPage(getMaxRecords()); - } - return wbRequest; - } - return null; - } - - /** - * Extract the value of the "Accept-Datetime" HTTP request header, if - * present, and further strips the date value from any surrounding "{","}" - * @param req HttpServletRequest for this request - * @return the raw String containing the date information, or null if no - * such HTTP header exists. - */ - public String getHttpDate(HttpServletRequest req) { - String httpdate = req.getHeader(DTHEADER); - - if (httpdate != null) { - int j = httpdate.indexOf("{", 0); - - if (j >= 0) { - - httpdate = httpdate.substring(httpdate.indexOf("{", 0) + 1); - - } - - if (httpdate.indexOf("}") > 0) { - httpdate = httpdate.substring(0, httpdate.indexOf("}")); - - } - } - return httpdate; - } - - /** - * Attempt to parse the String httpdate argument using one of the - * SimpleDateFormats provided. - * - * @param httpdate - * String version of a Date - * @param list - * of SimpleDateFormats to parse the httpdate - * @return Date object set to the time parsed, or null if not parsed - */ - public Date checkDateValidity(String httpdate, List<SimpleDateFormat> list) { - - Date d = null; - Iterator<SimpleDateFormat> it = list.iterator(); - while (it.hasNext()) { - SimpleDateFormat formatter = it.next(); - try { - - d = formatter.parse(httpdate); - break; - - } catch (Exception e) { - e.printStackTrace(); - } - - } - - return d; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/TimeBundleParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/TimeBundleParser.java 2010-08-10 23:27:47 UTC (rev 3228) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/TimeBundleParser.java 2010-08-16 22:57:47 UTC (rev 3229) @@ -1,88 +0,0 @@ -package org.archive.wayback.requestparser; - -import javax.servlet.http.HttpServletRequest; - -import org.apache.log4j.Logger; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.BetterRequestException; -import org.archive.wayback.webapp.AccessPoint; - -/** - * RequestParser subclass which parses "timebundle/URL" and - * "timemap/FORMAT/URL" requests - * - * @author Lyudmila Balakireva - * - */ -public class TimeBundleParser extends WrappedRequestParser { - private static final Logger LOGGER = - Logger.getLogger(TimeBundleParser.class.getName()); - - String MEMENTO_BASE = "timegate"; - - /** - * @param wrapped BaseRequestParser holding config - */ - public TimeBundleParser(BaseRequestParser wrapped) { - super(wrapped); - } - - @Override - public WaybackRequest parse(HttpServletRequest httpRequest, - AccessPoint accessPoint) throws BadQueryException, - BetterRequestException { - - String requestPath = accessPoint.translateRequestPathQuery(httpRequest); - LOGGER.trace("requestpath:" + requestPath); - - if (requestPath.startsWith("timebundle")) { - - WaybackRequest wbRequest = new WaybackRequest(); - String urlStr = requestPath.substring(requestPath.indexOf("/") + 1); - if (wbRequest.getStartTimestamp() == null) { - wbRequest.setStartTimestamp(getEarliestTimestamp()); - } - if (wbRequest.getEndTimestamp() == null) { - wbRequest.setEndTimestamp(getLatestTimestamp()); - } - wbRequest.setCaptureQueryRequest(); - wbRequest.setRequestUrl(urlStr); - - // TODO: is it critical to return a 303 code, or will a 302 do? - // if so, this and ORE.jsp can be simplified by throwing a - // BetterRequestException here. - wbRequest.put("redirect", "true"); - return wbRequest; - } - - if (requestPath.startsWith("timemap")) { - - String urlStrplus = requestPath - .substring(requestPath.indexOf("/") + 1); - String format = urlStrplus.substring(0, urlStrplus.indexOf("/")); - - LOGGER.trace("format:" + format); - String urlStr = urlStrplus.substring(urlStrplus.indexOf("/") + 1); - LOGGER.trace("id:" + urlStr); - WaybackRequest wbRequest = new WaybackRequest(); - if (wbRequest.getStartTimestamp() == null) { - wbRequest.setStartTimestamp(getEarliestTimestamp()); - } - wbRequest.setAnchorTimestamp(getLatestTimestamp()); - wbRequest.put("format", format); - if (wbRequest.getEndTimestamp() == null) { - wbRequest.setEndTimestamp(getLatestTimestamp()); - } - wbRequest.setCaptureQueryRequest(); - wbRequest.setRequestUrl(urlStr); - if (wbRequest != null) { - wbRequest.setResultsPerPage(getMaxRecords()); - } - return wbRequest; - - } - return null; - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-08-27 23:35:22
|
Revision: 3243 http://archive-access.svn.sourceforge.net/archive-access/?rev=3243&view=rev Author: bradtofel Date: 2010-08-27 23:35:14 +0000 (Fri, 27 Aug 2010) Log Message: ----------- LOGGING: changed all log4j references to java.util.Logging TWEAK: reorganized many import stanza Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeGateRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/PortMapper.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/SpringReader.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/StaticFileRequestHandler.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/PerformanceLogger.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/ServerRelativeArchivalRedirect.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -31,7 +31,7 @@ import java.net.URL; import java.net.URLEncoder; -import org.apache.log4j.Logger; +import java.util.logging.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -34,7 +34,7 @@ import java.util.LinkedList; import java.util.List; -import org.apache.log4j.Logger; +import java.util.logging.Logger; /** * Class which parses a robots.txt file, storing the rules contained therein, @@ -112,7 +112,7 @@ current = new ArrayList<String>(); } rules.put(ua, current); - LOGGER.trace("Found User-agent(" + ua + ") rules..."); + LOGGER.fine("Found User-agent(" + ua + ") rules..."); continue; } if (read.matches("(?i)Disallow:.*")) { @@ -145,7 +145,7 @@ return false; } else { - LOGGER.trace("UA(" + curUA + ") has (" + LOGGER.fine("UA(" + curUA + ") has (" + disallowedPath + ") blocked...(" + disallowedPath.length() + ")"); if (disallowedPath.equals("/") || path.startsWith(disallowedPath)) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -25,10 +25,9 @@ package org.archive.wayback.accesscontrol.staticmap; import java.util.Map; +import java.util.logging.Logger; - import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; @@ -64,7 +63,7 @@ if(nextSearch == null) { break; } - LOGGER.trace("EXCLUSION-MAP:Checking " + nextSearch); + LOGGER.fine("EXCLUSION-MAP:Checking " + nextSearch); if(exclusionMap.containsKey(nextSearch)) { LOGGER.info("EXCLUSION-MAP: EXCLUDED: \"" + nextSearch + "\" (" + url +")"); return true; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -28,9 +28,8 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; - -import org.apache.log4j.Logger; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; @@ -72,7 +71,7 @@ long currentMod = file.lastModified(); if(currentMod == lastUpdated) { if(currentMod == 0) { - LOGGER.error("No exclude file at " + file.getAbsolutePath()); + LOGGER.severe("No exclude file at " + file.getAbsolutePath()); } return; } @@ -85,7 +84,7 @@ lastUpdated = -1; currentMap = null; e.printStackTrace(); - LOGGER.error("Reload " + file.getAbsolutePath() + " FAILED:" + + LOGGER.severe("Reload " + file.getAbsolutePath() + " FAILED:" + e.getLocalizedMessage()); } } @@ -101,7 +100,7 @@ } String surt = line.startsWith("(") ? line : SURTTokenizer.prefixKey(line); - LOGGER.trace("EXCLUSION-MAP: adding " + surt); + LOGGER.fine("EXCLUSION-MAP: adding " + surt); newMap.put(surt, null); } itr.close(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -26,8 +26,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.IPRange; import org.archive.wayback.util.operator.BooleanOperator; @@ -62,7 +62,7 @@ if(range.setRange(ip)) { this.allowedRanges.add(range); } else { - LOGGER.error("Unable to parse range (" + ip + ")"); + LOGGER.severe("Unable to parse range (" + ip + ")"); } } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -28,8 +28,8 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.io.ArchiveFileConstants; import org.archive.io.WriterPoolSettings; import org.archive.io.arc.ARCConstants; @@ -55,10 +55,17 @@ ARCCacheDirectory.class.getName()); private int poolWriters = 5; - private int maxPoolWait = 60 * 1000; + private int maxPoolWait = 5 * 1000; private long maxARCSize = ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE; private String arcPrefix = "wayback-live"; + /** + * template string used to configure the ARC writer pool + */ + public static String LIVE_WAYBACK_TEMPLATE = + "${prefix}-${timestamp17}-${serialno}"; + + private File arcDir = null; private ARCWriterPool pool = null; @@ -131,19 +138,11 @@ private WriterPoolSettings getSettings(final boolean isCompressed, final String prefix, final File[] arcDirs) { return new WriterPoolSettings() { - public long getMaxSize() { - return maxARCSize; - } - public List<File> getOutputDirs() { return Arrays.asList(arcDirs); } - public boolean isCompressed() { - return isCompressed; - } - - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked", "rawtypes" }) public List getMetadata() { return null; } @@ -152,9 +151,19 @@ return prefix; } - public String getSuffix() { - return null; + public boolean getCompress() { + // TODO Auto-generated method stub + return isCompressed; } + + public long getMaxFileSizeBytes() { + // TODO Auto-generated method stub + return maxARCSize; + } + + public String getTemplate() { + return LIVE_WAYBACK_TEMPLATE; + } }; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -26,20 +26,22 @@ package org.archive.wayback.liveweb; import java.io.IOException; +import java.util.logging.Logger; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; import org.archive.wayback.util.webapp.AbstractRequestHandler; +import org.archive.wayback.util.webapp.ShutdownListener; /** * @author brad * */ -public class ARCRecordingProxy extends AbstractRequestHandler { +public class ARCRecordingProxy extends AbstractRequestHandler +implements ShutdownListener { private final static String EXPIRES_HEADER = "Expires"; private long expiresMS = 60 * 60 * 1000; @@ -142,4 +144,9 @@ public void setFakeExpiresMS(long fakeExpiresMS) { this.fakeExpiresMS = fakeExpiresMS; } + + public void shutdown() { + arcCacheDir.shutdown(); + + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -28,6 +28,7 @@ import java.io.IOException; import java.util.Iterator; import java.util.Map; +import java.util.logging.Logger; import java.util.zip.GZIPInputStream; import javax.servlet.ServletException; @@ -39,7 +40,6 @@ import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.log4j.Logger; import org.archive.io.arc.ARCRecord; import org.archive.wayback.core.Resource; import org.archive.wayback.exception.ResourceNotAvailableException; @@ -97,7 +97,7 @@ try { res = ResourceFactory.ARCArchiveRecordToResource(r, null); } catch (ResourceNotAvailableException e) { - LOGGER.error(e); + LOGGER.severe(e.getMessage()); throw new IOException(e); } httpResponse.setStatus(res.getStatusCode()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -29,8 +29,10 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.net.ConnectException; import java.net.UnknownHostException; import java.util.Date; +import java.util.logging.Logger; import org.apache.commons.httpclient.ConnectTimeoutException; import org.apache.commons.httpclient.Header; @@ -42,8 +44,6 @@ import org.apache.commons.httpclient.SimpleHttpConnectionManager; import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.cookie.CookiePolicy; -import org.apache.commons.httpclient.params.HttpClientParams; -import org.apache.log4j.Logger; import org.archive.httpclient.HttpRecorderGetMethod; import org.archive.io.RecordingInputStream; import org.archive.io.arc.ARCWriter; @@ -78,6 +78,8 @@ private int socketTimeoutMS = 10000; private int outBufferSize = 1024 * 100; private int inBufferSize = 1024 * 100; +// private int outBufferSize = 10; +// private int inBufferSize = 100; private final ThreadLocal<HttpClient> tl = new ThreadLocal<HttpClient>() { @@ -87,9 +89,6 @@ manager.getParams().setConnectionTimeout(connectionTimeoutMS); manager.getParams().setSoTimeout(socketTimeoutMS); http.setHttpConnectionManager(manager); - HttpClientParams clientParams = new HttpClientParams(); -// LOGGER.warn("Setting HTTP UserAgent to " + userAgent); -// clientParams.setParameter("http.useragent", userAgent); return http; } }; @@ -139,17 +138,21 @@ getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); - ByteOp.discardStream(getMethod.getResponseBodyAsStream()); +// ByteOp.discardStream(getMethod.getResponseBodyAsStream()); + ByteOp.copyStream(getMethod.getResponseBodyAsStream(), System.out); getMethod.releaseConnection(); gotUrl = true; } catch (URIException e) { e.printStackTrace(); } catch (UnknownHostException e) { - LOGGER.warn("Unknown host for " + url); + LOGGER.warning("Unknown host for " + url); } catch (ConnectTimeoutException e) { // TODO: should we act like it's a full block? - LOGGER.warn("Timeout out connecting to " + url); + LOGGER.warning("Timeout out connecting to " + url); + } catch (ConnectException e) { + LOGGER.warning("ConnectionRefused to " + url); + } catch (HttpException e) { e.printStackTrace(); // we have to let IOExceptions out, problems caused by local disk Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleRequestParser.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeBundleRequestParser.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -1,8 +1,9 @@ package org.archive.wayback.memento; +import java.util.logging.Logger; + import javax.servlet.http.HttpServletRequest; -import org.apache.log4j.Logger; import org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; @@ -37,7 +38,7 @@ BetterRequestException { String requestPath = accessPoint.translateRequestPathQuery(httpRequest); - LOGGER.trace("requestpath:" + requestPath); + LOGGER.fine("requestpath:" + requestPath); if (requestPath.startsWith("timebundle")) { @@ -72,9 +73,9 @@ .substring(requestPath.indexOf("/") + 1); String format = urlStrplus.substring(0, urlStrplus.indexOf("/")); - LOGGER.trace("format:" + format); + LOGGER.fine("format:" + format); String urlStr = urlStrplus.substring(urlStrplus.indexOf("/") + 1); - LOGGER.trace("id:" + urlStr); + LOGGER.fine("id:" + urlStr); WaybackRequest wbRequest = new WaybackRequest(); if (wbRequest.getStartTimestamp() == null) { wbRequest.setStartTimestamp(getEarliestTimestamp()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeGateRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeGateRequestParser.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/memento/TimeGateRequestParser.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -5,10 +5,10 @@ import java.util.Date; import java.util.Iterator; import java.util.List; +import java.util.logging.Logger; import javax.servlet.http.HttpServletRequest; -import org.apache.log4j.Logger; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.BetterRequestException; @@ -55,7 +55,7 @@ String base = accessPoint.translateRequestPath(httpRequest); String requestPath = accessPoint.translateRequestPathQuery(httpRequest); - LOGGER.trace("requestPath:" + requestPath); + LOGGER.fine("requestPath:" + requestPath); if (base.startsWith(MEMENTO_BASE)) { // strip leading "timegate/": Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -30,12 +30,12 @@ import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; +import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import org.apache.log4j.Logger; import org.archive.wayback.ResourceIndex; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -28,12 +28,12 @@ import java.io.IOException; import java.net.URL; import java.net.URLConnection; +import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import org.apache.log4j.Logger; import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; @@ -96,7 +96,7 @@ if(factory != null) { builder = factory.newDocumentBuilder(); if (!builder.isNamespaceAware()) { - LOGGER.error("Builder is not namespace aware."); + LOGGER.severe("Builder is not namespace aware."); } } } catch (ParserConfigurationException e) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -29,8 +29,8 @@ import java.util.HashMap; import java.util.Iterator; import java.util.NoSuchElementException; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.UrlSearchResult; import org.archive.wayback.util.CloseableIterator; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -24,8 +24,9 @@ */ package org.archive.wayback.resourceindex.bdb; +import java.util.logging.Logger; + import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; @@ -74,7 +75,7 @@ urlKey = canonicalizer.urlStringToKey(origUrl); } catch (URIException e) { // e.printStackTrace(); - LOGGER.warn("FAILED canonicalize(" + origUrl +")"); + LOGGER.warning("FAILED canonicalize(" + origUrl +")"); urlKey = origUrl; } keySB.append(urlKey); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatToSearchResultAdapter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatToSearchResultAdapter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -25,7 +25,8 @@ package org.archive.wayback.resourceindex.cdx; -import org.apache.log4j.Logger; +import java.util.logging.Logger; + import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.format.CDXFormat; import org.archive.wayback.resourceindex.cdx.format.CDXFormatException; @@ -44,7 +45,7 @@ try { return cdx.parseResult(line); } catch (CDXFormatException e) { - LOGGER.warn("CDXFormat(" + line + "):"+e.getLocalizedMessage()); + LOGGER.warning("CDXFormat(" + line + "):"+e.getLocalizedMessage()); } return null; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -25,7 +25,8 @@ package org.archive.wayback.resourceindex.cdx; -import org.apache.log4j.Logger; +import java.util.logging.Logger; + import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.url.UrlOperations; @@ -109,7 +110,7 @@ try { compressedOffset = Long.parseLong(tokens[nextToken]); } catch (NumberFormatException e) { - LOGGER.warn("Bad compressed Offset field("+nextToken+") in (" + + LOGGER.warning("Bad compressed Offset field("+nextToken+") in (" + line +")"); return null; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -32,18 +32,15 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Set; +import java.util.logging.Logger; import java.util.regex.Pattern; -import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; -import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.util.FileDownloader; import org.archive.wayback.resourceindex.CompositeSearchResultSource; import org.archive.wayback.resourceindex.cdx.CDXIndex; -import org.archive.wayback.resourceindex.cdx.dynamic.CDXDefinitionFile; -import org.archive.wayback.resourceindex.cdx.dynamic.MD5LocationFile; -import org.archive.wayback.resourceindex.cdx.dynamic.RangeAssignmentFile; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.FileDownloader; /** * A CompositeSearchResultSource that autmatically manages it's list of sources @@ -274,7 +271,7 @@ break; } else { tmpTarget.delete(); - LOGGER.warn("Bad file contents. Location(" + + LOGGER.warning("Bad file contents. Location(" + loc +") should have MD5(" + neededMD5 + ") but has MD5(" + gotMD5 +")"); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -31,9 +31,9 @@ import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; +import java.util.logging.Logger; import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.SearchResults; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -24,7 +24,8 @@ */ package org.archive.wayback.resourceindex.filters; -import org.apache.log4j.Logger; +import java.util.logging.Logger; + import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; @@ -55,7 +56,7 @@ public int filterObject(CaptureSearchResult r) { recordsScanned++; if(recordsScanned > maxRecordsToScan) { - LOGGER.warn("Hit max requests on " + r.getUrlKey() + " " + LOGGER.warning("Hit max requests on " + r.getUrlKey() + " " + r.getCaptureTimestamp()); return FILTER_ABORT; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -24,20 +24,20 @@ */ package org.archive.wayback.resourceindex.updater; +import java.io.BufferedOutputStream; import java.io.File; -import java.io.BufferedOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.Iterator; +import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.InputStreamRequestEntity; import org.apache.commons.httpclient.methods.PutMethod; -import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; @@ -114,7 +114,7 @@ } File toBeMergedFile = new File(toBeMergedDir,base); if(toBeMergedFile.exists()) { - LOGGER.warn("WARNING: "+toBeMergedFile.getAbsolutePath() + + LOGGER.warning("WARNING: "+toBeMergedFile.getAbsolutePath() + "already exists!"); } else { if(cdx.renameTo(toBeMergedFile)) { @@ -122,7 +122,7 @@ " for merging."); added = true; } else { - LOGGER.error("FAILED rename("+cdx.getAbsolutePath()+ + LOGGER.severe("FAILED rename("+cdx.getAbsolutePath()+ ") to ("+toBeMergedFile.getAbsolutePath()+")"); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -26,8 +26,8 @@ import java.io.File; import java.io.IOException; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ConfigurationException; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -30,10 +30,9 @@ import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; +import java.util.logging.Logger; import java.util.zip.GZIPInputStream; -import org.apache.log4j.Logger; - /** * @author brad * @@ -74,7 +73,7 @@ StringBuilder sb = new StringBuilder(16); sb.append(BYTES_HEADER).append(offset).append(BYTES_MINUS); sb.append((offset + BLOCK_SIZE)-1); - LOGGER.trace("Reading block:" + urlOrPath + "("+sb.toString()+")"); + LOGGER.fine("Reading block:" + urlOrPath + "("+sb.toString()+")"); // TODO: timeouts URL u = new URL(urlOrPath); URLConnection uc = u.openConnection(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -29,19 +29,15 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.util.Iterator; import java.util.List; -import java.util.RandomAccess; +import java.util.logging.Logger; import java.util.zip.GZIPInputStream; -import org.apache.log4j.Logger; -import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.RuntimeIOException; import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.webapp.AccessPoint; /** * @author brad Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -30,8 +30,8 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.SearchResultSource; @@ -97,7 +97,7 @@ String line = lines.next(); String[] parts = line.split("\\s"); if(parts.length != 2) { - LOGGER.error("Bad line(" + line +") in (" + + LOGGER.severe("Bad line(" + line +") in (" + chunkMapPath + ")"); throw new IOException("Bad line(" + line +") in (" + chunkMapPath + ")"); @@ -151,7 +151,7 @@ numBlocks++; String parts[] = blockDescriptor.split("\t"); if(parts.length != 3) { - LOGGER.error("Bad line(" + blockDescriptor +") in (" + + LOGGER.severe("Bad line(" + blockDescriptor +") in (" + chunkMapPath + ")"); throw new ResourceIndexNotAvailableException("Bad line(" + blockDescriptor + ")"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -25,11 +25,11 @@ package org.archive.wayback.resourcestore; import java.io.IOException; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.ResourceStore; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; @@ -85,7 +85,7 @@ // which means we've already read some } catch (IOException e) { - LOGGER.warn("Unable to retrieve resource from " + url); + LOGGER.warning("Unable to retrieve resource from " + url); } if(r != null) { break; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/HTTPRecordAnnotater.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -27,14 +27,14 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.util.logging.Logger; import org.apache.commons.httpclient.Header; -import org.apache.log4j.Logger; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.htmllex.ContextAwareLexer; +import org.archive.wayback.util.htmllex.ParseContext; import org.archive.wayback.util.htmllex.ParseEventDelegator; -import org.archive.wayback.util.htmllex.ParseContext; import org.archive.wayback.util.url.UrlOperations; import org.htmlparser.Node; import org.htmlparser.lexer.Lexer; @@ -156,13 +156,13 @@ } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); - LOGGER.warn(fileContext + " " + e.getLocalizedMessage()); + LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); - LOGGER.warn(fileContext + " " + e.getLocalizedMessage()); + LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } catch (IOException e) { - LOGGER.warn(fileContext + " " + e.getLocalizedMessage()); + LOGGER.warning(fileContext + " " + e.getLocalizedMessage()); } } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -29,8 +29,8 @@ import java.io.FileReader; import java.io.IOException; import java.io.PrintWriter; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.util.CloseableIterator; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -28,8 +28,8 @@ import java.io.IOException; import java.io.PrintWriter; import java.util.Iterator; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; @@ -103,7 +103,7 @@ try { pathsOrUrls = db.nameToUrls(name); } catch(IOException e) { - LOGGER.error("FAILED TO LOOKUP(" + name + ")" + + LOGGER.severe("FAILED TO LOOKUP(" + name + ")" + e.getLocalizedMessage()); return false; } @@ -118,7 +118,7 @@ } } } catch(IOException e) { - LOGGER.error("FAILED to index or upload (" + name + ")"); + LOGGER.severe("FAILED to index or upload (" + name + ")"); e.printStackTrace(); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -26,13 +26,13 @@ import java.io.File; import java.io.IOException; +import java.util.logging.Logger; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpParser; import org.apache.commons.httpclient.StatusLine; import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.util.EncodingUtil; -import org.apache.log4j.Logger; import org.archive.io.ArchiveRecordHeader; import org.archive.io.RecoverableIOException; import org.archive.io.arc.ARCConstants; @@ -166,7 +166,7 @@ String urlKey = canonicalizer.urlStringToKey(origUrl); result.setUrlKey(urlKey); } catch (URIException e) { - LOGGER.warn("FAILED canonicalize(" + origUrl + "):" + + LOGGER.warning("FAILED canonicalize(" + origUrl + "):" + file + " " + offset); result.setUrlKey(origUrl); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -33,6 +33,7 @@ import java.net.InetSocketAddress; import java.net.Socket; import java.net.URL; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -41,7 +42,6 @@ import javax.servlet.http.HttpServletResponse; import org.apache.commons.httpclient.ChunkedInputStream; -import org.apache.log4j.Logger; import org.archive.util.anvl.ANVLRecord; import org.archive.wayback.util.http.HttpRequestMessage; import org.archive.wayback.util.http.HttpResponse; @@ -96,7 +96,7 @@ if(urls == null || urls.length == 0) { - LOGGER.warn("No locations for " + location.getName()); + LOGGER.warning("No locations for " + location.getName()); httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, "Unable to locate("+ location.getName() +")"); } else { @@ -110,12 +110,12 @@ break; } } catch(IOException e) { - LOGGER.warn("failed proxy of " + urlString + " " + + LOGGER.warning("failed proxy of " + urlString + " " + e.getLocalizedMessage()); } } if(ds == null) { - LOGGER.warn("No successful locations for " + + LOGGER.warning("No successful locations for " + location.getName()); httpResponse.sendError(HttpServletResponse.SC_BAD_GATEWAY, "failed proxy of ("+ location.getName() +")"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -31,6 +31,7 @@ import java.io.InputStreamReader; import java.util.Arrays; import java.util.Iterator; +import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpStatus; @@ -38,8 +39,6 @@ import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.util.ParameterFormatter; -import org.apache.log4j.Logger; -import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBServlet; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.WrappedCloseableIterator; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBUpdater.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBUpdater.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -27,8 +27,8 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.resourcestore.resourcefile.ResourceFileList; import org.archive.wayback.resourcestore.resourcefile.ResourceFileLocation; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -29,10 +29,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.logging.Logger; -import org.apache.log4j.Logger; - /** * Local directory tree holding ARC and WARC files. * @@ -87,7 +86,7 @@ } } } else { - LOGGER.warn(root.getAbsolutePath() + " is not a directory."); + LOGGER.warning(root.getAbsolutePath() + " is not a directory."); return; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -28,8 +28,8 @@ import java.io.IOException; import java.util.HashMap; import java.util.Iterator; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.CloseableIterator; @@ -80,7 +80,7 @@ if(location != null) { list.add(location); } else { - LOGGER.warn("Bad parse of line(" + line + ") in (" + + LOGGER.warning("Bad parse of line(" + line + ") in (" + source.getAbsolutePath() + ")"); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -27,8 +27,8 @@ import java.io.File; import java.io.IOException; import java.util.List; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBUpdater; import org.archive.wayback.util.DirMaker; @@ -87,7 +87,7 @@ LOGGER.info("Synchronized " + name); } catch (IOException e) { e.printStackTrace(); - LOGGER.warn("FAILED Synchronize " + name + e.getMessage()); + LOGGER.warning("FAILED Synchronize " + name + e.getMessage()); } } @@ -120,7 +120,7 @@ if(sleepInterval > 0) { sleep(sleepInterval); } else { - LOGGER.warn("Last Synchronize took " + syncDuration + + LOGGER.warning("Last Synchronize took " + syncDuration + " where interval is " + interval + ". Not sleeping."); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -32,8 +32,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.io.arc.ARCConstants; import org.archive.io.arc.ARCWriter; import org.archive.util.ArchiveUtils; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -7,8 +7,8 @@ import java.util.Iterator; import java.util.List; import java.util.TimeZone; +import java.util.logging.Logger; -import org.apache.log4j.Logger; import org.archive.wayback.util.partition.size.DayPartitionSize; import org.archive.wayback.util.partition.size.HourPartitionSize; import org.archive.wayback.util.partition.size.MonthPartitionSize; @@ -200,7 +200,7 @@ } if(itr.hasNext()) { // eew... Likely bad usage. is this an error? - LOGGER.warn("Not all elements fit in partitions!"); + LOGGER.warning("Not all elements fit in partitions!"); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -389,6 +389,11 @@ line + ") skipped (" + parts[column] + ")"); e.printStackTrace(); continue; + } catch (StringIndexOutOfBoundsException e) { + System.err.println("Invalid URL in line " + lineNumber + " (" + + line + ") skipped (" + parts[column] + ")"); + e.printStackTrace(); + continue; } } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-08-26 23:43:06 UTC (rev 3242) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-08-27 23:35:14 UTC (rev 3243) @@ -24,11 +24,11 @@ */ package org.archive.wayback.util.url; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; import org.archive.net.UURI; import org.archive.net.UURIFactory; @@ -162,7 +162,7 @@ try { return UURIFactory.getInstance(url).getEscapedURI(); } catch (URIException e) { - LOGGER.warn(e.getLocalizedMessage() + ": " + url); + LOGGER.warning(e.getLocalizedMessage() + ": " + url); // can't let a space exist... send back close to whatever came // in... return url.replace(" ", "%20"); @@ -175,7 +175,7 @@ absBaseURI = UURIFactory.getInstance(baseUrl); resolvedURI = UURIFactory.getInstance(absBaseURI, url); } catch (URIException e) { - LOGGER.warn(e.getLocalizedMessage() + ": " + url); + LOGGER.warning(e.getLocalizedMessage(... [truncated message content] |
From: <bra...@us...> - 2010-10-11 19:55:33
|
Revision: 3275 http://archive-access.svn.sourceforge.net/archive-access/?rev=3275&view=rev Author: bradtofel Date: 2010-10-11 19:55:26 +0000 (Mon, 11 Oct 2010) Log Message: ----------- REFACTOR: moved doRediction() logic into method which can be overridden, enable specific Replays to customize redirect URLs. Added ArchivalURL implementation which forwards datespec flags with the redirected URLs Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectingClosestResultSelector.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectingClosestResultSelector.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectingClosestResultSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectingClosestResultSelector.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectingClosestResultSelector.java 2010-10-11 19:55:26 UTC (rev 3275) @@ -0,0 +1,42 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.wayback.archivalurl; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BetterRequestException; +import org.archive.wayback.replay.DateRedirectingClosestResultSelector; + +/** + * @author brad + * + */ +public class ArchivalUrlDateRedirectingClosestResultSelector +extends DateRedirectingClosestResultSelector { + protected void doRedirection(WaybackRequest wbRequest, + CaptureSearchResult closest) throws BetterRequestException { + // redirect to the better version: + ArchivalUrl aUrl = new ArchivalUrl(wbRequest); + String betterUrl = wbRequest.getAccessPoint().getReplayPrefix() + + aUrl.toString(closest.getCaptureTimestamp(), + closest.getOriginalUrl()); + throw new BetterRequestException(betterUrl); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectingClosestResultSelector.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectingClosestResultSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectingClosestResultSelector.java 2010-10-11 18:59:43 UTC (rev 3274) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectingClosestResultSelector.java 2010-10-11 19:55:26 UTC (rev 3275) @@ -53,15 +53,18 @@ } } if(doRedirect) { - // redirect to the better version: - String url = closest.getOriginalUrl(); - String captureDate = closest.getCaptureTimestamp(); - ResultURIConverter uriConverter = - wbRequest.getAccessPoint().getUriConverter(); - String betterURI = uriConverter.makeReplayURI(captureDate,url); - throw new BetterRequestException(betterURI); + doRedirection(wbRequest,closest); } return closest; } - + protected void doRedirection(WaybackRequest wbRequest, + CaptureSearchResult closest) throws BetterRequestException { + // redirect to the better version: + String url = closest.getOriginalUrl(); + String captureDate = closest.getCaptureTimestamp(); + ResultURIConverter uriConverter = + wbRequest.getAccessPoint().getUriConverter(); + String betterURI = uriConverter.makeReplayURI(captureDate,url); + throw new BetterRequestException(betterURI); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-03-09 05:56:23
|
Revision: 3429 http://archive-access.svn.sourceforge.net/archive-access/?rev=3429&view=rev Author: bradtofel Date: 2011-03-09 05:56:17 +0000 (Wed, 09 Mar 2011) Log Message: ----------- FEATURE: Now detects a LiveWebTimeout, or LiveRobotsTimeout request, and aborts subsequent attempts, which are also likely to timeout within this request. More of a BugFix, since this caused dramatic problems, hanging up the thread to timeout on robots request for each search result.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2011-03-09 05:53:57 UTC (rev 3428) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2011-03-09 05:56:17 UTC (rev 3429) @@ -35,6 +35,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.LiveWebCacheUnavailableException; +import org.archive.wayback.exception.LiveWebTimeoutException; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; @@ -188,7 +189,7 @@ rulesCache.put(firstUrlString,tmpRules); rules = tmpRules; LOGGER.info("ROBOT: Downloaded("+urlString+")"); - + } catch (LiveDocumentNotAvailableException e) { LOGGER.info("ROBOT: LiveDocumentNotAvailableException("+urlString+")"); @@ -201,7 +202,12 @@ return null; } catch (LiveWebCacheUnavailableException e) { LOGGER.info("ROBOT: LiveWebCacheUnavailableException("+urlString+")"); + filterGroup.setLiveWebGone(); return null; + } catch (LiveWebTimeoutException e) { + LOGGER.info("ROBOT: LiveDocumentTimedOutException("+urlString+")"); + filterGroup.setRobotTimedOut(); + return null; } } } @@ -226,7 +232,11 @@ } int filterResult = ObjectFilter.FILTER_EXCLUDE; RobotRules rules = getRules(r); - if(rules != null) { + if(rules == null) { + if(filterGroup.getRobotTimedOut() || filterGroup.getLiveWebGone()) { + return ObjectFilter.FILTER_ABORT; + } + } else { String resultURL = r.getOriginalUrl(); URL url; try { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2011-03-09 05:53:57 UTC (rev 3428) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/ExclusionCaptureFilterGroup.java 2011-03-09 05:56:17 UTC (rev 3429) @@ -26,8 +26,11 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.AdministrativeAccessControlException; +import org.archive.wayback.exception.LiveWebCacheUnavailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.RobotAccessControlException; +import org.archive.wayback.exception.RobotNotAvailableException; +import org.archive.wayback.exception.RobotTimedOutAccessControlException; import org.archive.wayback.resourceindex.filters.CounterFilter; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.ObjectFilter; @@ -41,6 +44,8 @@ String requestUrl = null; private boolean sawRobots = false; private boolean passedRobots = false; + private boolean robotTimedOut = false; + private boolean liveWebGone = false; private boolean sawAdministrative = false; private boolean passedAdministrative = false; @@ -67,7 +72,16 @@ } public void annotateResults(SearchResults results) - throws AccessControlException, ResourceNotInArchiveException { + throws AccessControlException, ResourceNotInArchiveException, + RobotNotAvailableException { + if(robotTimedOut) { + throw new RobotTimedOutAccessControlException("Unable to check" + + " robots.txt for " + requestUrl); + } + if(liveWebGone) { + throw new RobotNotAvailableException("The URL " + requestUrl + + " is blocked by the sites robots.txt file"); + } if(sawRobots && !passedRobots) { throw new RobotAccessControlException("The URL " + requestUrl + " is blocked by the sites robots.txt file"); @@ -91,4 +105,18 @@ public void setSawAdministrative() { sawAdministrative = true; } + + public void setRobotTimedOut() { + robotTimedOut = true; + } + public boolean getRobotTimedOut() { + return robotTimedOut; + } + + public void setLiveWebGone() { + liveWebGone = true; + } + public boolean getLiveWebGone() { + return liveWebGone; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-05-25 01:36:55
|
Revision: 3450 http://archive-access.svn.sourceforge.net/archive-access/?rev=3450&view=rev Author: bradtofel Date: 2011-05-25 01:36:49 +0000 (Wed, 25 May 2011) Log Message: ----------- FEATURE: added FrameWrapped state tracking code Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/FrameEmbeddedRequestSelector.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/IFrameEmbeddedRequestSelector.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2011-05-25 01:33:03 UTC (rev 3449) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2011-05-25 01:36:49 UTC (rev 3450) @@ -53,7 +53,7 @@ public class FastArchivalUrlReplayParseEventHandler implements ParseEventHandler { - private final static String FERRET_DONE_KEY = + public final static String FERRET_DONE_KEY = FastArchivalUrlReplayParseEventHandler.class.toString(); private String jspInsertPath = "/WEB-INF/replay/DisclaimChooser.jsp"; @@ -79,6 +79,8 @@ anchorUrlTrans = new URLStringTransformer(); anchorUrlTrans.setJsTransformer(jsBlockTrans); } + private static URLStringTransformer framesetUrlTrans = + new URLStringTransformer("fw_"); private static URLStringTransformer cssUrlTrans = new URLStringTransformer("cs_"); private static URLStringTransformer jsUrlTrans = @@ -227,7 +229,7 @@ transformAttr(context, tagNode, "ACTION", anchorUrlTrans); } else if(tagName.equals("FRAME")) { - transformAttr(context, tagNode, "SRC", anchorUrlTrans); + transformAttr(context, tagNode, "SRC", framesetUrlTrans); } else if(tagName.equals("LINK")) { if(transformAttrWhere(context, tagNode, "REL", "STYLESHEET", Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/FrameEmbeddedRequestSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/FrameEmbeddedRequestSelector.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/FrameEmbeddedRequestSelector.java 2011-05-25 01:36:49 UTC (rev 3450) @@ -0,0 +1,18 @@ +package org.archive.wayback.replay.selector; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.WaybackRequest; + +public class FrameEmbeddedRequestSelector extends BaseReplayRendererSelector { + + /* (non-Javadoc) + * @see org.archive.wayback.replay.selector.BaseReplayRendererSelector#canHandle(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.CaptureSearchResult, org.archive.wayback.core.Resource) + */ + @Override + public boolean canHandle(WaybackRequest wbRequest, + CaptureSearchResult result, Resource resource) { + return wbRequest.isFrameWrapperContext(); + } + +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/IFrameEmbeddedRequestSelector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/IFrameEmbeddedRequestSelector.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/selector/IFrameEmbeddedRequestSelector.java 2011-05-25 01:36:49 UTC (rev 3450) @@ -0,0 +1,18 @@ +package org.archive.wayback.replay.selector; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.WaybackRequest; + +public class IFrameEmbeddedRequestSelector extends BaseReplayRendererSelector { + + /* (non-Javadoc) + * @see org.archive.wayback.replay.selector.BaseReplayRendererSelector#canHandle(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.CaptureSearchResult, org.archive.wayback.core.Resource) + */ + @Override + public boolean canHandle(WaybackRequest wbRequest, + CaptureSearchResult result, Resource resource) { + return wbRequest.isIFrameWrapperContext(); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-05-25 01:47:00
|
Revision: 3455 http://archive-access.svn.sourceforge.net/archive-access/?rev=3455&view=rev Author: bradtofel Date: 2011-05-25 01:46:53 +0000 (Wed, 25 May 2011) Log Message: ----------- OPTIMIZ: now use static reference to ByteOp.UTF8 Charset object. Previously, it was either being "assumed" as default, as in, not specified, or referenced by name, causing a lookup of the Charset object, which was causing lock contention Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -31,6 +31,8 @@ import java.util.logging.Logger; +import org.archive.wayback.util.ByteOp; + /** * Class which parses a robots.txt file, storing the rules contained therein, * and then allows for testing if path/userAgent tuples are blocked by those @@ -80,7 +82,7 @@ public void parse(InputStream is) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader( - (InputStream) is)); + (InputStream) is,ByteOp.UTF8)); String read; ArrayList<String> current = null; while (br != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -34,6 +34,7 @@ import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.bdb.BDBRecord; import org.archive.wayback.util.bdb.BDBRecordSet; @@ -204,7 +205,7 @@ } else if(op.compareTo("-w") == 0) { BufferedReader br = new BufferedReader( - new InputStreamReader(System.in)); + new InputStreamReader(System.in,ByteOp.UTF8)); RecordIterator itrS = new RecordIterator(br); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -24,6 +24,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.bdb.BDBRecord; /** @@ -50,18 +51,13 @@ */ public CaptureSearchResult adapt(BDBRecord record) { sb.setLength(0); - try { - String key = new String(record.getKey().getData(),"UTF-8"); - int urlEnd = key.indexOf(' '); - int dateSpecEnd = key.indexOf(' ',urlEnd + 1); - sb.append(key.substring(0,dateSpecEnd)); - sb.append(" "); - sb.append(new String(record.getValue().getData(),"UTF-8")); - sb.append(key.substring(dateSpecEnd)); - } catch (UnsupportedEncodingException e) { - // should not happen with UTF-8 hard-coded.. - e.printStackTrace(); - } + String key = new String(record.getKey().getData(),ByteOp.UTF8); + int urlEnd = key.indexOf(' '); + int dateSpecEnd = key.indexOf(' ',urlEnd + 1); + sb.append(key.substring(0,dateSpecEnd)); + sb.append(" "); + sb.append(new String(record.getValue().getData(),ByteOp.UTF8)); + sb.append(key.substring(dateSpecEnd)); return CDXLineToSearchResultAdapter.doAdapt(sb.toString()); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -20,14 +20,17 @@ package org.archive.wayback.resourceindex.cdx; import java.io.BufferedReader; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.util.Iterator; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.format.CDXFormat; import org.archive.wayback.resourceindex.cdx.format.CDXFormatException; import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; public class CDXFormatIndex extends CDXIndex { @@ -44,7 +47,9 @@ try { // BUGBUG: I don't think java will let us do much better than // this... No way to stat() a filehandle, right? - BufferedReader fr = new BufferedReader(new FileReader(file)); + FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis,ByteOp.UTF8); + BufferedReader fr = new BufferedReader(isr); cdx = new CDXFormat(fr.readLine()); lastMod = nowMod; fr.close(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -40,6 +40,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.archive.wayback.util.ByteOp; + /** * Filter that accepts PUT HTTP requests to insert CDX files into the incoming * directory for a local BDBIndex. @@ -152,7 +154,7 @@ InputStream input; input = request.getInputStream(); BufferedInputStream in = new BufferedInputStream(input); - BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + BufferedReader reader = new BufferedReader(new InputStreamReader(in,ByteOp.UTF8)); FileWriter out = new FileWriter(tmpFile); while ((i = reader.read()) != -1) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -27,6 +27,8 @@ import java.util.logging.Logger; import java.util.zip.GZIPInputStream; +import org.archive.wayback.util.ByteOp; + /** * @author brad * @@ -73,6 +75,6 @@ URLConnection uc = u.openConnection(); uc.setRequestProperty(RANGE_HEADER, sb.toString()); return new BufferedReader(new InputStreamReader( - new GZIPInputStream(uc.getInputStream()))); + new GZIPInputStream(uc.getInputStream()),ByteOp.UTF8)); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -31,6 +31,7 @@ import java.util.zip.GZIPInputStream; import org.archive.wayback.exception.RuntimeIOException; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; /** @@ -134,7 +135,7 @@ long offset = i * ZiplinedBlock.BLOCK_SIZE; raf.seek(offset); BufferedReader br = new BufferedReader(new InputStreamReader( - new GZIPInputStream(new FileInputStream(raf.getFD())))); + new GZIPInputStream(new FileInputStream(raf.getFD())),ByteOp.UTF8)); String line = br.readLine(); if(line == null) { System.err.println("Bad block at " + offset + " in " + args[0]); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -21,13 +21,16 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.logging.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.DirMaker; @@ -102,7 +105,9 @@ public long getLastMark() throws IOException { long mark = 0; if(file.isFile() && file.length() > 0) { - BufferedReader ir = new BufferedReader(new FileReader(file)); + FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis,ByteOp.UTF8); + BufferedReader ir = new BufferedReader(isr); String line = ir.readLine(); if(line != null) { mark = Long.parseLong(line); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -25,6 +25,7 @@ import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBLog; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.bdb.BDBRecordSet; @@ -277,7 +278,7 @@ db.setBdbName(bdbName); db.setLogPath(logPath); BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); + new InputStreamReader(System.in,ByteOp.UTF8)); String line; int exitCode = 0; try { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -34,6 +34,7 @@ import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.util.ParameterFormatter; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.WrappedCloseableIterator; @@ -265,7 +266,7 @@ if(operation.equalsIgnoreCase("add-stream")) { BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); + new InputStreamReader(System.in,ByteOp.UTF8)); String line; try { while((line = r.readLine()) != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -21,11 +21,14 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; import java.io.RandomAccessFile; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.flatfile.RecordIterator; @@ -87,7 +90,9 @@ RandomAccessFile raf = new RandomAccessFile(this, "r"); raf.seek(start); - BufferedReader is = new BufferedReader(new FileReader(raf.getFD())); + FileInputStream fis = new FileInputStream(raf.getFD()); + InputStreamReader isr = new InputStreamReader(fis,ByteOp.UTF8); + BufferedReader is = new BufferedReader(isr); return new BufferedRangeIterator(new RecordIterator(is),end - start); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -24,6 +24,8 @@ import java.io.InputStreamReader; import java.net.URL; +import org.archive.wayback.util.ByteOp; + /** * * @@ -57,7 +59,7 @@ String url = "http://localhost:8080" + jsp + "?url=" + prefix; URL u = new URL(url); InputStream is = u.openStream(); - InputStreamReader isr = new InputStreamReader(is); + InputStreamReader isr = new InputStreamReader(is,ByteOp.UTF8); StringBuilder sb = new StringBuilder(2000); int READ_SIZE = 2048; char cbuf[] = new char[READ_SIZE]; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -27,7 +27,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.archive.wayback.util.ByteOp; + /** * * @@ -61,7 +63,7 @@ public static List<String> extractLinks(final String url) throws IOException { URL u = new URL(url); InputStream is = u.openStream(); - InputStreamReader isr = new InputStreamReader(is); + InputStreamReader isr = new InputStreamReader(is,ByteOp.UTF8); StringBuilder sb = new StringBuilder(2000); int READ_SIZE = 2048; char cbuf[] = new char[READ_SIZE]; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.charset.Charset; /** * Byte oriented static methods. Likely a lot of overlap with apache- commons @@ -33,6 +34,7 @@ public class ByteOp { /** Default buffer size for IO ops */ public final static int BUFFER_SIZE = 4096; + public final static Charset UTF8 = Charset.forName("utf-8"); /** * Create a new byte array with contents initialized to values from the Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -24,7 +24,9 @@ import java.io.UnsupportedEncodingException; import java.util.Iterator; +import org.archive.wayback.util.ByteOp; + import com.sleepycat.je.Cursor; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; @@ -119,26 +121,14 @@ * @return byte array representation of String s in UTF-8 */ public static byte[] stringToBytes(String s) { - try { - return s.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - // no UTF-8, huh? - e.printStackTrace(); - return s.getBytes(); - } + return s.getBytes(ByteOp.UTF8); } /** * @param ba * @return String of UTF-8 encoded bytes ba */ public static String bytesToString(byte[] ba) { - try { - return new String(ba,"UTF-8"); - } catch (UnsupportedEncodingException e) { - // not likely.. - e.printStackTrace(); - return new String(ba); - } + return new String(ba,ByteOp.UTF8); } /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -21,13 +21,16 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.RandomAccessFile; import java.util.Comparator; import java.util.Iterator; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.CompositeSortedIterator; @@ -180,7 +183,9 @@ RandomAccessFile raf = new RandomAccessFile(file,"r"); long offset = findKeyOffset(raf,prefix); lastMatchOffset = offset; - BufferedReader br = new BufferedReader(new FileReader(raf.getFD())); + FileInputStream is = new FileInputStream(raf.getFD()); + InputStreamReader isr = new InputStreamReader(is, ByteOp.UTF8); + BufferedReader br = new BufferedReader(isr); itr = new RecordIterator(br); return itr; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -31,6 +31,7 @@ import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.wayback.UrlCanonicalizer; +import org.archive.wayback.util.ByteOp; /** * Class that performs the standard Heritrix URL canonicalization. Eventually, @@ -365,7 +366,7 @@ for(int idx = 0; idx < columns.size(); idx++) { cols[idx] = columns.get(idx).intValue() - 1; } - BufferedReader r = new BufferedReader(new InputStreamReader(System.in)); + BufferedReader r = new BufferedReader(new InputStreamReader(System.in,ByteOp.UTF8)); StringBuilder sb = new StringBuilder(); String line = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-05-25 19:40:35
|
Revision: 3460 http://archive-access.svn.sourceforge.net/archive-access/?rev=3460&view=rev Author: bradtofel Date: 2011-05-25 19:40:28 +0000 (Wed, 25 May 2011) Log Message: ----------- FEATURE: added code to allow parseStart and parseComplete event handlers Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventDelegator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventHandler.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/ParseStartHandler.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2011-05-25 01:51:34 UTC (rev 3459) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2011-05-25 19:40:28 UTC (rev 3460) @@ -167,6 +167,7 @@ ContextAwareLexer lex = new ContextAwareLexer(lexer, context); Node node; try { + delegator.handleParseStart(context); while((node = lex.nextNode()) != null) { delegator.handleNode(context, node); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2011-05-25 01:51:34 UTC (rev 3459) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2011-05-25 19:40:28 UTC (rev 3460) @@ -57,7 +57,8 @@ FastArchivalUrlReplayParseEventHandler.class.toString(); private String jspInsertPath = "/WEB-INF/replay/DisclaimChooser.jsp"; - private String commentJsp = "/WEB-INF/replay/ArchiveComment.jsp"; + private String endJsp = "/WEB-INF/replay/ArchiveComment.jsp"; + private String startJsp = null; private final String[] okHeadTags = { "![CDATA[*", "![CDATA[", "?", "!DOCTYPE", "HTML", "HEAD", "BASE", "LINK", "META", "TITLE", @@ -332,13 +333,14 @@ } return false; } + public void handleParseComplete(ParseContext pContext) throws IOException { - if(commentJsp != null) { + if(endJsp != null) { ReplayParseContext context = (ReplayParseContext) pContext; OutputStream out = context.getOutputStream(); String tmp = null; try { - tmp = context.getJspExec().jspToString(commentJsp); + tmp = context.getJspExec().jspToString(endJsp); } catch (ServletException e) { e.printStackTrace(); } @@ -350,6 +352,24 @@ } } + public void handleParseStart(ParseContext pContext) throws IOException { + if(startJsp != null) { + ReplayParseContext context = (ReplayParseContext) pContext; + OutputStream out = context.getOutputStream(); + String tmp = null; + try { + tmp = context.getJspExec().jspToString(startJsp); + } catch (ServletException e) { + e.printStackTrace(); + } + if(tmp != null) { +// Charset charset = Charset.forName(context.getOutputCharset()); + String charset = context.getOutputCharset(); + out.write(tmp.getBytes(charset)); + } + } + } + /** * @return the jspInsertPath */ @@ -366,15 +386,47 @@ /** * @return the commentJsp + * @deprecated use getEndJsp() */ public String getCommentJsp() { - return commentJsp; + return getEndJsp(); } /** * @param commentJsp the commentJsp to set + * @deprecated use setEndJsp() */ public void setCommentJsp(String commentJsp) { - this.commentJsp = commentJsp; + setEndJsp(commentJsp); } + /** + * @return the path to the JSP to execute and include at the start of the + * document + */ + public String getStartsp() { + return startJsp; + } + + /** + * @param endJsp the path to the JSP to execute and include at the start + * of the document + */ + public void setStartJsp(String startJsp) { + this.startJsp = startJsp; + } + /** + * @return the path to the JSP to execute and include at the end of the + * document + */ + public String getEndJsp() { + return endJsp; + } + + /** + * @param endJsp the path to the JSP to execute and include at the end + * of the document + */ + public void setEndJsp(String endJsp) { + this.endJsp = endJsp; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java 2011-05-25 01:51:34 UTC (rev 3459) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java 2011-05-25 19:40:28 UTC (rev 3460) @@ -84,6 +84,12 @@ } + public void handleParseStart(ParseContext context) throws IOException { + preModifyDelegator.handleParseStart(context); + modifyDelegator.handleParseStart(context); + postModifyDelegator.handleParseStart(context); + } + public void handleParseComplete(ParseContext context) throws IOException { preModifyDelegator.handleParseComplete(context); modifyDelegator.handleParseComplete(context); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventDelegator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventDelegator.java 2011-05-25 01:51:34 UTC (rev 3459) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventDelegator.java 2011-05-25 19:40:28 UTC (rev 3460) @@ -31,6 +31,7 @@ import org.archive.wayback.util.htmllex.handlers.JSTextHandler; import org.archive.wayback.util.htmllex.handlers.OpenTagHandler; import org.archive.wayback.util.htmllex.handlers.ParseCompleteHandler; +import org.archive.wayback.util.htmllex.handlers.ParseStartHandler; import org.archive.wayback.util.htmllex.handlers.RemarkTextHandler; import org.htmlparser.Node; import org.htmlparser.nodes.RemarkNode; @@ -71,6 +72,7 @@ private List<RemarkTextHandler> remarkTextHandler = null; private List<ContentTextHandler> contentTextHandler = null; private List<ParseCompleteHandler> parseCompleteHandlers = null; + private List<ParseStartHandler> parseStartHandlers = null; private List<ParseEventDelegatorVisitor> parserVisitors = null; @@ -234,6 +236,20 @@ } } + public void addParseStartHandler(ParseStartHandler v) { + if(parseStartHandlers == null) { + parseStartHandlers = new ArrayList<ParseStartHandler>(); + } + parseStartHandlers.add(v); + } + public void handleParseStart(ParseContext context) throws IOException { + if(parseStartHandlers != null) { + for(ParseStartHandler v : parseStartHandlers) { + v.handleParseStart(context); + } + } + } + /** * @return the parserVisitors */ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventHandler.java 2011-05-25 01:51:34 UTC (rev 3459) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseEventHandler.java 2011-05-25 19:40:28 UTC (rev 3460) @@ -32,6 +32,8 @@ * */ public interface ParseEventHandler { + + public void handleParseStart(ParseContext context) throws IOException; public void handleNode(ParseContext context, Node node) throws IOException; public void handleParseComplete(ParseContext context) throws IOException; Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/ParseStartHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/ParseStartHandler.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/ParseStartHandler.java 2011-05-25 19:40:28 UTC (rev 3460) @@ -0,0 +1,10 @@ +package org.archive.wayback.util.htmllex.handlers; + +import java.io.IOException; + +import org.archive.wayback.util.htmllex.ParseContext; + +public interface ParseStartHandler { + public void handleParseStart(ParseContext context) + throws IOException; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-09-06 08:17:58
|
Revision: 3537 http://archive-access.svn.sourceforge.net/archive-access/?rev=3537&view=rev Author: bradtofel Date: 2011-09-06 08:17:52 +0000 (Tue, 06 Sep 2011) Log Message: ----------- INTERFACE heritrix-commons 3.1.0 changes Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2011-09-06 04:24:36 UTC (rev 3536) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2011-09-06 08:17:52 UTC (rev 3537) @@ -133,7 +133,7 @@ private WriterPoolSettings getSettings(final boolean isCompressed, final String prefix, final File[] arcDirs) { return new WriterPoolSettings() { - public List<File> getOutputDirs() { + public List<File> calcOutputDirs() { return Arrays.asList(arcDirs); } @@ -167,6 +167,11 @@ public int getWriteBufferSize() { return 4096; } + +// public List<File> calcOutputDirs() { +// // TODO Auto-generated method stub +// return null; +// } }; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java 2011-09-06 04:24:36 UTC (rev 3536) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java 2011-09-06 08:17:52 UTC (rev 3537) @@ -126,7 +126,7 @@ private WriterPoolSettings getSettings(final boolean isCompressed, final String prefix, final List<File> arcDirs) { return new WriterPoolSettings() { - public List<File> getOutputDirs() { + public List<File> calcOutputDirs() { return arcDirs; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java 2011-09-06 04:24:36 UTC (rev 3536) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java 2011-09-06 08:17:52 UTC (rev 3537) @@ -62,7 +62,7 @@ private WARCWriterPoolSettings getSettings(final boolean isCompressed, final String prefix, final List<File> arcDirs, final List metadata) { return new WARCWriterPoolSettings() { - public List<File> getOutputDirs() { + public List<File> calcOutputDirs() { return arcDirs; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2011-11-29 03:45:16
|
Revision: 3572 http://archive-access.svn.sourceforge.net/archive-access/?rev=3572&view=rev Author: ikreymer Date: 2011-11-29 03:45:09 +0000 (Tue, 29 Nov 2011) Log Message: ----------- AccessPoint: Added a null for getRealPath() return value when checking static resources, as some servlet containers (ex. some versions of jetty) may return null for the path, however a redirect to the local resource may still return the resource. LocalResourceIndex: Changed order of filter groups so that AccessPointFilterGroup is first. This is needed so that the FilePrefixFilter check is performed before the WARCRevisit filter to ensure that only records with correct prefix are saved for revisit. Solves an issue where a revisit record was pointing to the latter warc result, but one that was from the wrong file prefix Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-11-19 00:57:07 UTC (rev 3571) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-11-29 03:45:09 UTC (rev 3572) @@ -117,9 +117,9 @@ public LocalResourceIndex() { canonicalizer = new AggressiveUrlCanonicalizer(); fgFactories = new ArrayList<FilterGroupFactory>(); + fgFactories.add(new AccessPointCaptureFilterGroupFactory()); fgFactories.add(new CoreCaptureFilterGroupFactory()); fgFactories.add(new QueryCaptureFilterGroupFactory()); - fgFactories.add(new AccessPointCaptureFilterGroupFactory()); fgFactories.add(new ExclusionCaptureFilterGroupFactory()); fgFactories.add(new ClosestTrackingCaptureFilterGroupFactory()); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2011-11-19 00:57:07 UTC (rev 3571) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2011-11-29 03:45:09 UTC (rev 3572) @@ -151,23 +151,29 @@ String translatedNoQuery = "/" + translateRequestPath(httpRequest); // String absPath = getServletContext().getRealPath(contextRelativePath); String absPath = getServletContext().getRealPath(translatedNoQuery); - File test = new File(absPath); - if(test.exists()) { - - String translatedQ = "/" + translateRequestPathQuery(httpRequest); - - WaybackRequest wbRequest = new WaybackRequest(); -// wbRequest.setContextPrefix(getUrlRoot()); - wbRequest.setAccessPoint(this); - wbRequest.fixup(httpRequest); - UIResults uiResults = new UIResults(wbRequest,uriConverter); - try { - uiResults.forward(httpRequest, httpResponse, translatedQ); - return true; - } catch(IOException e) { - // TODO: figure out if we got IO because of a missing dispatcher + + //IK: added null check for absPath, it may be null (ex. on jetty) + if (absPath != null) { + File test = new File(absPath); + if((test != null) && !test.exists()) { + return false; } } + + String translatedQ = "/" + translateRequestPathQuery(httpRequest); + + WaybackRequest wbRequest = new WaybackRequest(); +// wbRequest.setContextPrefix(getUrlRoot()); + wbRequest.setAccessPoint(this); + wbRequest.fixup(httpRequest); + UIResults uiResults = new UIResults(wbRequest,uriConverter); + try { + uiResults.forward(httpRequest, httpResponse, translatedQ); + return true; + } catch(IOException e) { + // TODO: figure out if we got IO because of a missing dispatcher + } + return false; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2012-02-18 07:07:13
|
Revision: 3614 http://archive-access.svn.sourceforge.net/archive-access/?rev=3614&view=rev Author: ikreymer Date: 2012-02-18 07:07:06 +0000 (Sat, 18 Feb 2012) Log Message: ----------- FEATURE: Provide ability to set access point path seperate from the Spring bean name. This is really useful for interpolation, as Spring bean names are not interpolated as properties. Allows for following setup: <bean name="webAccessPoint" class="org.archive.wayback.webapp.AccessPoint"> <property name="accessPointPath" value="${wayback.host}:${wayback.port}:wayback"/> ... </bean> Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2012-02-16 19:07:40 UTC (rev 3613) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2012-02-18 07:07:06 UTC (rev 3614) @@ -23,6 +23,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.archive.wayback.webapp.AccessPoint; + /** * Helper static methods to implement registration of a RequestHandler with a * RequestMapper, based on the beanName() method. @@ -128,7 +130,17 @@ */ public static void registerHandler(RequestHandler handler, RequestMapper mapper) { - String name = handler.getBeanName(); + + String name = null; + + if (handler instanceof AccessPoint) { + name = ((AccessPoint)handler).getAccessPointName(); + } + + if (name == null) { + name = handler.getBeanName(); + } + if(name != null) { if(name.equals(RequestMapper.GLOBAL_PRE_REQUEST_HANDLER)) { LOGGER.info("Registering Global-pre request handler:" + Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-16 19:07:40 UTC (rev 3613) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-18 07:07:06 UTC (rev 3614) @@ -132,6 +132,8 @@ private BooleanOperator<WaybackRequest> authentication = null; private long embargoMS = 0; private CustomResultFilterFactory filterFactory = null; + + private String accessPointPath = null; public void init() { checkAccessPointAware(collection,exception,query,parser,replay, @@ -958,4 +960,12 @@ public CustomResultFilterFactory getFilterFactory() { return filterFactory; } + + public String getAccessPointPath() { + return accessPointPath; + } + + public void setAccessPointPath(String accessPointPath) { + this.accessPointPath = accessPointPath; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2012-02-21 04:05:45
|
Revision: 3618 http://archive-access.svn.sourceforge.net/archive-access/?rev=3618&view=rev Author: ikreymer Date: 2012-02-21 04:05:39 +0000 (Tue, 21 Feb 2012) Log Message: ----------- FEATURE: Add shutdown() method to ExclusionFilter, allowing for a filter to perform cleanup (such as closing resources) after a request is complete, not just after every capture check. The shutdown() method is optional and is a no-op by default, except in CompositeExclusionFilter which propagates the shutdown to other its members Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2012-02-21 01:39:14 UTC (rev 3617) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2012-02-21 04:05:39 UTC (rev 3618) @@ -30,9 +30,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.archive.util.ArchiveUtils; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.LiveWebCacheUnavailableException; import org.archive.wayback.exception.LiveWebTimeoutException; @@ -97,7 +96,7 @@ sb = new StringBuilder(100); } - private String hostToRobotUrlString(String host) { + protected String hostToRobotUrlString(String host) { sb.setLength(0); sb.append(HTTP_PREFIX).append(host).append(ROBOT_SUFFIX); String robotUrl = sb.toString(); @@ -177,15 +176,15 @@ rulesCache.put(firstUrlString, rules); } } else { + long start = System.currentTimeMillis();; try { LOGGER.fine("ROBOT: NotCached - Downloading("+urlString+")"); tmpRules = new RobotRules(); - long start = System.currentTimeMillis(); Resource resource = webCache.getCachedResource(new URL(urlString), maxCacheMS,true); - long elapsed = System.currentTimeMillis() - start; - PerformanceLogger.noteElapsed("RobotRequest", elapsed, urlString); + //long elapsed = System.currentTimeMillis() - start; + //PerformanceLogger.noteElapsed("RobotRequest", elapsed, urlString); if(resource.getStatusCode() != 200) { LOGGER.info("ROBOT: NotAvailable("+urlString+")"); @@ -214,6 +213,9 @@ LOGGER.severe("ROBOT: LiveDocumentTimedOutException("+urlString+")"); filterGroup.setRobotTimedOut(); return null; + } finally { + long elapsed = System.currentTimeMillis() - start; + PerformanceLogger.noteElapsed("RobotRequest", elapsed, urlString); } } } @@ -269,4 +271,8 @@ } return filterResult; } + + public LiveWebCache getWebCache() { + return webCache; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2012-02-21 01:39:14 UTC (rev 3617) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2012-02-21 04:05:39 UTC (rev 3618) @@ -69,4 +69,12 @@ } return FILTER_INCLUDE; } + + @Override + public void shutdown() + { + for (ExclusionFilter filter : filters) { + filter.shutdown(); + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java 2012-02-21 01:39:14 UTC (rev 3617) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java 2012-02-21 04:05:39 UTC (rev 3618) @@ -32,4 +32,9 @@ public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { this.filterGroup = filterGroup; } + + public void shutdown() + { + + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-21 01:39:14 UTC (rev 3617) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-21 04:05:39 UTC (rev 3618) @@ -192,6 +192,7 @@ WaybackRequest wbRequest = null; boolean handled = false; + ExclusionFilter exclusionFilter = null; try { String inputPath = translateRequestPathQuery(httpRequest); @@ -219,7 +220,7 @@ } if(getExclusionFactory() != null) { - ExclusionFilter exclusionFilter = + exclusionFilter = getExclusionFactory().get(); if(exclusionFilter == null) { throw new AdministrativeAccessControlException( @@ -279,7 +280,12 @@ getException().renderException(httpRequest, httpResponse, wbRequest, e, getUriConverter()); } + } finally { + if (exclusionFilter != null) { + exclusionFilter.shutdown(); + } } + return handled; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2012-02-21 20:51:14
|
Revision: 3619 http://archive-access.svn.sourceforge.net/archive-access/?rev=3619&view=rev Author: ikreymer Date: 2012-02-21 20:51:08 +0000 (Tue, 21 Feb 2012) Log Message: ----------- FIX: Moving accessPointPath from AccessPoint to AbstractRequestHandler to enable setting custom accessPointPath for other handlers (such as LiveWebAccessPoint) Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/AbstractRequestHandler.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/AbstractRequestHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/AbstractRequestHandler.java 2012-02-21 04:05:39 UTC (rev 3618) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/AbstractRequestHandler.java 2012-02-21 20:51:08 UTC (rev 3619) @@ -37,6 +37,7 @@ */ public abstract class AbstractRequestHandler implements RequestHandler { private String beanName = null; + private String accessPointPath = null; private ServletContext servletContext = null; public void setBeanName(final String beanName) { @@ -45,7 +46,15 @@ public String getBeanName() { return beanName; } + + public String getAccessPointPath() { + return accessPointPath; + } + public void setAccessPointPath(String accessPointPath) { + this.accessPointPath = accessPointPath; + } + public void setServletContext(ServletContext servletContext) { this.servletContext = servletContext; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2012-02-21 04:05:39 UTC (rev 3618) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2012-02-21 20:51:08 UTC (rev 3619) @@ -23,8 +23,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.archive.wayback.webapp.AccessPoint; - /** * Helper static methods to implement registration of a RequestHandler with a * RequestMapper, based on the beanName() method. @@ -133,8 +131,8 @@ String name = null; - if (handler instanceof AccessPoint) { - name = ((AccessPoint)handler).getAccessPointPath(); + if (handler instanceof AbstractRequestHandler) { + name = ((AbstractRequestHandler)handler).getAccessPointPath(); } if (name == null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-21 04:05:39 UTC (rev 3618) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-21 20:51:08 UTC (rev 3619) @@ -132,8 +132,6 @@ private BooleanOperator<WaybackRequest> authentication = null; private long embargoMS = 0; private CustomResultFilterFactory filterFactory = null; - - private String accessPointPath = null; public void init() { checkAccessPointAware(collection,exception,query,parser,replay, @@ -966,12 +964,4 @@ public CustomResultFilterFactory getFilterFactory() { return filterFactory; } - - public String getAccessPointPath() { - return accessPointPath; - } - - public void setAccessPointPath(String accessPointPath) { - this.accessPointPath = accessPointPath; - } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2012-02-28 04:07:03
|
Revision: 3620 http://archive-access.svn.sourceforge.net/archive-access/?rev=3620&view=rev Author: ikreymer Date: 2012-02-28 04:06:57 +0000 (Tue, 28 Feb 2012) Log Message: ----------- REMOVE: Remove shutdown() from ExclusionFilter, was experimental (since a few builds ago) proved unnecessary Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2012-02-21 20:51:08 UTC (rev 3619) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2012-02-28 04:06:57 UTC (rev 3620) @@ -69,12 +69,4 @@ } return FILTER_INCLUDE; } - - @Override - public void shutdown() - { - for (ExclusionFilter filter : filters) { - filter.shutdown(); - } - } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java 2012-02-21 20:51:08 UTC (rev 3619) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/ExclusionFilter.java 2012-02-28 04:06:57 UTC (rev 3620) @@ -32,9 +32,4 @@ public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { this.filterGroup = filterGroup; } - - public void shutdown() - { - - } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-21 20:51:08 UTC (rev 3619) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2012-02-28 04:06:57 UTC (rev 3620) @@ -190,7 +190,6 @@ WaybackRequest wbRequest = null; boolean handled = false; - ExclusionFilter exclusionFilter = null; try { String inputPath = translateRequestPathQuery(httpRequest); @@ -218,7 +217,7 @@ } if(getExclusionFactory() != null) { - exclusionFilter = + ExclusionFilter exclusionFilter = getExclusionFactory().get(); if(exclusionFilter == null) { throw new AdministrativeAccessControlException( @@ -278,10 +277,6 @@ getException().renderException(httpRequest, httpResponse, wbRequest, e, getUriConverter()); } - } finally { - if (exclusionFilter != null) { - exclusionFilter.shutdown(); - } } return handled; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2012-02-28 04:09:52
|
Revision: 3621 http://archive-access.svn.sourceforge.net/archive-access/?rev=3621&view=rev Author: ikreymer Date: 2012-02-28 04:09:45 +0000 (Tue, 28 Feb 2012) Log Message: ----------- FEATURE: Add support for automatically decoding text files that contain a "Content-Encoding: gzip". This functionality is enabled by default for HTML, JS, and CSS replay renderers Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/GzipDecodingResource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2012-02-28 04:06:57 UTC (rev 3620) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2012-02-28 04:09:45 UTC (rev 3621) @@ -19,12 +19,10 @@ */ package org.archive.wayback.archivalurl; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.nio.charset.Charset; import java.util.Map; import javax.servlet.ServletException; @@ -46,14 +44,11 @@ import org.archive.wayback.replay.charset.CharsetDetector; import org.archive.wayback.replay.charset.StandardCharsetDetector; import org.archive.wayback.replay.html.ReplayParseContext; -import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.htmllex.ContextAwareLexer; import org.archive.wayback.util.htmllex.ParseEventHandler; import org.htmlparser.Node; -import org.htmlparser.lexer.InputStreamSource; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; -import org.htmlparser.lexer.Source; import org.htmlparser.util.ParserException; /** @@ -91,6 +86,8 @@ CaptureSearchResult result, Resource resource, ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, WaybackException { + + resource = TextReplayRenderer.decodeResource(resource); // The URL of the page, for resolving in-page relative URLs: URL url = null; Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/GzipDecodingResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/GzipDecodingResource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/GzipDecodingResource.java 2012-02-28 04:09:45 UTC (rev 3621) @@ -0,0 +1,76 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + * Provide a wrapper for a Resource that is gzip encoded, that is, + * Resources that have the header: + * Content-Type: gzip + * + * Used by TextReplayRenderers and other ReplayRenderers that add content to the resulting output + * + */ + +package org.archive.wayback.replay; + +import java.io.IOException; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.archive.wayback.core.Resource; + +public class GzipDecodingResource extends Resource { + + public static final String GZIP = "gzip"; + + private Resource source; + + public GzipDecodingResource(Resource source) + { + this.source = source; + + try { + this.setInputStream(new GZIPInputStream(source)); + } catch (IOException io) { + // If can't read as gzip, might as well as send back raw data. + this.setInputStream(source); + } + } + + @Override + public long getRecordLength() { + return source.getRecordLength(); + } + + @Override + public Map<String, String> getHttpHeaders() { + return source.getHttpHeaders(); + } + + @Override + public void close() throws IOException { + source.close(); + } + + @Override + public int getStatusCode() { + return source.getStatusCode(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/GzipDecodingResource.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java 2012-02-28 04:06:57 UTC (rev 3620) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java 2012-02-28 04:09:45 UTC (rev 3621) @@ -44,6 +44,7 @@ "Transfer-Encoding".toUpperCase(); public final static String HTTP_CHUNKED_ENCODING_HEADER = "chunked".toUpperCase(); + public final static String HTTP_CONTENT_ENCODING = "Content-Encoding"; /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2012-02-28 04:06:57 UTC (rev 3620) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2012-02-28 04:09:45 UTC (rev 3621) @@ -76,6 +76,9 @@ Map<String,String> headers = HttpHeaderOperation.processHeaders( resource, result, uriConverter, httpHeaderProcessor); + + // Decode resource (such as if gzip encoded) + resource = decodeResource(resource); String charSet = charsetDetector.getCharset(resource, wbRequest); // Load content into an HTML page, and resolve load-time URLs: @@ -150,4 +153,22 @@ public void setGuessedCharsetHeader(String guessedCharsetHeader) { this.guessedCharsetHeader = guessedCharsetHeader; } + + public static Resource decodeResource(Resource resource) throws IOException + { + Map<String, String> headers = resource.getHttpHeaders(); + + if (headers != null) { + String encoding = headers.get(HttpHeaderOperation.HTTP_CONTENT_ENCODING); + if (encoding != null) { + if (encoding.toLowerCase().equals(GzipDecodingResource.GZIP)) { + return new GzipDecodingResource(resource); + } + + //TODO: check for other encodings? + } + } + + return resource; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |