You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: <bra...@us...> - 2008-07-01 23:55:36
|
Revision: 2380 http://archive-access.svn.sourceforge.net/archive-access/?rev=2380&view=rev Author: bradtofel Date: 2008-07-01 16:55:46 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Also now uses UIReplayResult object to forward context to .jsps Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2008-07-01 23:54:14 UTC (rev 2379) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2008-07-01 23:55:46 UTC (rev 2380) @@ -40,10 +40,10 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.query.UIQueryResults; +import org.archive.wayback.replay.UIReplayResult; import org.mozilla.universalchardet.UniversalDetector; /** @@ -68,7 +68,7 @@ private final static int C_BUFFER_SIZE = 4096; private Resource resource = null; - private SearchResult result = null; + private CaptureSearchResult result = null; private ResultURIConverter uriConverter = null; /** * the internal StringBuilder @@ -82,7 +82,7 @@ * @param result * @param uriConverter */ - public HTMLPage(Resource resource, SearchResult result, + public HTMLPage(Resource resource, CaptureSearchResult result, ResultURIConverter uriConverter) { this.resource = resource; this.result = result; @@ -234,8 +234,8 @@ public void resolvePageUrls() { // TODO: get url from Resource instead of SearchResult? - String pageUrl = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); + String pageUrl = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); String existingBaseHref = TagMagix.getBaseHref(sb); if (existingBaseHref == null) { @@ -268,8 +268,8 @@ public void resolveAllPageUrls() { // TODO: get url from Resource instead of SearchResult? - String pageUrl = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); + String pageUrl = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); String existingBaseHref = TagMagix.getBaseHref(sb); if (existingBaseHref != null) { @@ -304,16 +304,16 @@ public void resolveCSSUrls() { // TODO: get url from Resource instead of SearchResult? - String pageUrl = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); + String pageUrl = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); TagMagix.markupCSSImports(sb,uriConverter, captureDate, pageUrl); } public void resolveASXRefUrls() { // TODO: get url from Resource instead of SearchResult? - String pageUrl = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); + String pageUrl = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); ResultURIConverter ruc = new MMSToHTTPResultURIConverter(uriConverter); TagMagix.markupTagREURIC(sb, ruc, captureDate, pageUrl, @@ -436,12 +436,12 @@ */ public String includeJspString(String jspPath, HttpServletRequest httpRequest, HttpServletResponse httpResponse, - WaybackRequest wbRequest, SearchResults results, SearchResult result) + WaybackRequest wbRequest, CaptureSearchResults results, + CaptureSearchResult result, Resource resource) throws ServletException, IOException { - UIQueryResults uiResults = new UIQueryResults(httpRequest, wbRequest, - results, uriConverter); - uiResults.setResult(result); + UIReplayResult uiResults = new UIReplayResult(httpRequest, wbRequest, + result, results, resource, uriConverter); StringHttpServletResponseWrapper wrappedResponse = new StringHttpServletResponseWrapper(httpResponse); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:54:05
|
Revision: 2379 http://archive-access.svn.sourceforge.net/archive-access/?rev=2379&view=rev Author: bradtofel Date: 2008-07-01 16:54:14 -0700 (Tue, 01 Jul 2008) Log Message: ----------- INTERMEDIATE REFACTOR: split this class out into more classes, but hopefully this will soon be followed by a removal of most of this code, as the underlying *SearchResult*, WaybackRequest and Resource objects get better accessors. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UICaptureQueryResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIUrlQueryResults.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2008-07-01 23:51:43 UTC (rev 2378) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2008-07-01 23:54:14 UTC (rev 2379) @@ -28,6 +28,9 @@ import javax.servlet.http.HttpServletRequest; +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.WaybackConstants; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.webapp.AccessPoint; @@ -39,7 +42,8 @@ */ public class UIResults { private final static String FERRET_NAME = "ui-results"; - protected WaybackRequest wbRequest; + private WaybackRequest wbRequest; + private ResultURIConverter uriConverter; private String contentJsp = null; private String originalRequestURL = null; @@ -47,9 +51,10 @@ /** * @param wbRequest Wayback Request argument */ - public UIResults(WaybackRequest wbRequest) { + public UIResults(WaybackRequest wbRequest,ResultURIConverter uriConverter) { super(); this.wbRequest = wbRequest; + this.uriConverter = uriConverter; } /** * @return Returns the wbRequest. @@ -60,6 +65,25 @@ } return wbRequest; } + + /** + * @param url + * @return String url that will make a query for all captures of an URL. + */ + public String makeCaptureQueryUrl(String url) { + WaybackRequest newWBR = wbRequest.clone(); + + newWBR.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + try { + newWBR.setRequestUrl(url); + } catch (URIException e) { + // should not happen... + e.printStackTrace(); + } + return newWBR.getContextPrefix() + "query?" + + newWBR.getQueryArguments(1); + } /** * @return StringFormatter localized to user request @@ -100,7 +124,7 @@ public static UIResults getGeneric(HttpServletRequest httpRequest) { WaybackRequest wbRequest = new WaybackRequest(); wbRequest.fixup(httpRequest); - return new UIResults(wbRequest); + return new UIResults(wbRequest, null); } private static void replaceAll(StringBuffer s, final String o, final String n) { @@ -203,5 +227,35 @@ public String getOriginalRequestURL() { return originalRequestURL; } + /** + * @param result + * @return URL string that will replay the specified Resource Result. + */ + public String resultToReplayUrl(CaptureSearchResult result) { + if(uriConverter == null) { + return null; + } + String url = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); + return uriConverter.makeReplayURI(captureDate,url); + } + + /** + * @return the ResultURIConverter + */ + public ResultURIConverter getURIConverter() { + return uriConverter; + } + /** + * @param url + * @param timestamp + * @return String url that will replay the url at timestamp + */ + public String makeReplayUrl(String url, String timestamp) { + if(uriConverter == null) { + return null; + } + return uriConverter.makeReplayURI(timestamp, url); + } } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UICaptureQueryResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UICaptureQueryResults.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UICaptureQueryResults.java 2008-07-01 23:54:14 UTC (rev 2379) @@ -0,0 +1,99 @@ +/* UICaptureQueryResults + * + * $Id$ + * + * Created on 6:14:06 PM Jun 27, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.query; + +import java.util.Iterator; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class UICaptureQueryResults extends UIQueryResults { + + private CaptureSearchResults results; + private Timestamp firstResultTimestamp; + + private Timestamp lastResultTimestamp; + + /** + * Constructor -- chew search result summaries into format easier for JSPs + * to digest. + * + * @param httpRequest + * @param wbRequest + * @param results + * @param uriConverter + */ + public UICaptureQueryResults(HttpServletRequest httpRequest, + WaybackRequest wbRequest, CaptureSearchResults results, + ResultURIConverter uriConverter) { + super(httpRequest, wbRequest, results, uriConverter); + + this.firstResultTimestamp = Timestamp.parseBefore(results + .getFirstResultTimestamp()); + this.lastResultTimestamp = Timestamp.parseBefore(results + .getLastResultTimestamp()); + + this.results = results; + } + + /** + * @return first Timestamp in returned ResourceResults + */ + public Timestamp getFirstResultTimestamp() { + return firstResultTimestamp; + } + + /** + * @return last Timestamp in returned ResourceResults + */ + public Timestamp getLastResultTimestamp() { + return lastResultTimestamp; + } + + /** + * @return Iterator of CaptureSearchResult + */ + public Iterator<CaptureSearchResult> resultsIterator() { + return results.iterator(); + } + + /** + * @return Returns the results. + */ + public CaptureSearchResults getResults() { + return results; + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java 2008-07-01 23:51:43 UTC (rev 2378) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java 2008-07-01 23:54:14 UTC (rev 2379) @@ -25,19 +25,15 @@ package org.archive.wayback.query; import java.util.Date; -import java.util.Iterator; import javax.servlet.http.HttpServletRequest; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.WaybackConstants; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.UIResults; -import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; /** @@ -58,24 +54,18 @@ private Timestamp endTimestamp; - private Timestamp firstResultTimestamp; - - private Timestamp lastResultTimestamp; - private Timestamp exactRequestedTimestamp; - private int resultsReturned; - private int resultsMatching; - private int resultsPerPage; - private int firstResult; - private int lastResult; + private long resultsReturned; + private long resultsMatching; + private long resultsPerPage; + private long firstResult; + private long lastResult; private int numPages; private int curPage; - private SearchResults results; - private SearchResult result; - private ResultURIConverter uriConverter; + private CaptureSearchResult result; /** * Constructor -- chew search result summaries into format easier for JSPs @@ -89,26 +79,17 @@ public UIQueryResults(HttpServletRequest httpRequest, WaybackRequest wbRequest, SearchResults results, ResultURIConverter uriConverter) { - super(wbRequest); - this.searchUrl = wbRequest.get(WaybackConstants.RESULT_URL); + super(wbRequest,uriConverter); + this.searchUrl = wbRequest.get(WaybackConstants.REQUEST_URL); this.startTimestamp = Timestamp.parseBefore(results. getFilter(WaybackConstants.REQUEST_START_DATE)); this.endTimestamp = Timestamp.parseAfter(results.getFilter( WaybackConstants.REQUEST_END_DATE)); - this.firstResultTimestamp = Timestamp.parseBefore(results - .getFirstResultDate()); - this.lastResultTimestamp = Timestamp.parseBefore(results - .getLastResultDate()); - - this.resultsReturned = Integer.parseInt(results.getFilter( - WaybackConstants.RESULTS_NUM_RETURNED)); - this.resultsMatching = Integer.parseInt(results.getFilter( - WaybackConstants.RESULTS_NUM_RESULTS)); - this.resultsPerPage = Integer.parseInt(results.getFilter( - WaybackConstants.RESULTS_REQUESTED)); - this.firstResult = Integer.parseInt(results.getFilter( - WaybackConstants.RESULTS_FIRST_RETURNED)) + 1; + this.resultsReturned = results.getReturnedCount(); + this.resultsMatching = results.getMatchingCount(); + this.resultsPerPage = results.getNumRequested(); + this.firstResult = results.getFirstReturned() + 1; this.lastResult = ((firstResult - 1) + resultsReturned); this.exactRequestedTimestamp = Timestamp.parseAfter( wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE)); @@ -116,27 +97,9 @@ numPages = (int) Math.ceil((double)resultsMatching/(double)resultsPerPage); curPage = (int) Math.floor(((double)(firstResult-1))/(double)resultsPerPage) + 1; - this.results = results; - this.uriConverter = uriConverter; } /** - * @return true if the underlying SearchResult objects contain Capture level - * data - */ - public boolean isCaptureResults() { - return (results instanceof CaptureSearchResults); - } - - /** - * @return true if the underlying SearchResult objects contain Url level - * data - */ - public boolean isUrlResults() { - return (results instanceof UrlSearchResults); - } - - /** * @return Timestamp end cutoff requested by user */ public Timestamp getEndTimestamp() { @@ -144,20 +107,6 @@ } /** - * @return first Timestamp in returned ResourceResults - */ - public Timestamp getFirstResultTimestamp() { - return firstResultTimestamp; - } - - /** - * @return last Timestamp in returned ResourceResults - */ - public Timestamp getLastResultTimestamp() { - return lastResultTimestamp; - } - - /** * @return URL or URL prefix requested by user */ public String getSearchUrl() { @@ -172,59 +121,7 @@ } /** - * @return Iterator of ResourceResults - */ - public Iterator<SearchResult> resultsIterator() { - return results.iterator(); - } - - /** - * @param result - * @return URL string that will replay the specified Resource Result. - */ - public String resultToReplayUrl(SearchResult result) { - String url = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); - return uriConverter.makeReplayURI(captureDate,url); - } - - /** - * @return the ResultURIConverter - */ - public ResultURIConverter getURIConverter() { - return uriConverter; - } - - /** - * @param url * @param timestamp - * @return String url that will replay the url at timestamp - */ - public String makeReplayUrl(String url, String timestamp) { - return uriConverter.makeReplayURI(timestamp, url); - } - - /** - * @param url - * @return String url that will make a query for all captures of an URL. - */ - public String makeCaptureQueryUrl(String url) { - WaybackRequest newWBR = wbRequest.clone(); - - newWBR.put(WaybackConstants.REQUEST_TYPE, - WaybackConstants.REQUEST_URL_QUERY); - try { - newWBR.setRequestUrl(url); - } catch (URIException e) { - // should not happen... - e.printStackTrace(); - } - return newWBR.getContextPrefix() + "query?" + - newWBR.getQueryArguments(1); - } - - /** - * @param timestamp * @return Date for the timestamp string */ public Date timestampToDate(String timestamp) { @@ -235,37 +132,35 @@ * @param result * @return Date representing captureDate of SearchResult result */ - public Date resultToDate(SearchResult result) { - Timestamp t = new Timestamp(result.get( - WaybackConstants.RESULT_CAPTURE_DATE)); - return t.getDate(); + public Date resultToDate(CaptureSearchResult result) { + return result.getCaptureDate(); } /** * @return Returns the firstResult. */ - public int getFirstResult() { + public long getFirstResult() { return firstResult; } /** * @return Returns the resultsMatching. */ - public int getResultsMatching() { + public long getResultsMatching() { return resultsMatching; } /** * @return Returns the resultsPerPage. */ - public int getResultsPerPage() { + public long getResultsPerPage() { return resultsPerPage; } /** * @return Returns the resultsReturned. */ - public int getResultsReturned() { + public long getResultsReturned() { return resultsReturned; } @@ -289,6 +184,7 @@ * different page of results for the same query */ public String urlForPage(int pageNum) { + WaybackRequest wbRequest = getWbRequest(); return wbRequest.getContextPrefix() + "query?" + wbRequest.getQueryArguments(pageNum); } @@ -296,16 +192,9 @@ /** * @return Returns the lastResult. */ - public int getLastResult() { + public long getLastResult() { return lastResult; } - - /** - * @return Returns the results. - */ - public SearchResults getResults() { - return results; - } /** * @return Returns the exactRequestedTimestamp. @@ -314,11 +203,11 @@ return exactRequestedTimestamp; } - public SearchResult getResult() { + public CaptureSearchResult getResult() { return result; } - public void setResult(SearchResult result) { + public void setResult(CaptureSearchResult result) { this.result = result; } } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIUrlQueryResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIUrlQueryResults.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIUrlQueryResults.java 2008-07-01 23:54:14 UTC (rev 2379) @@ -0,0 +1,76 @@ +/* UIUrlQueryResults + * + * $Id$ + * + * Created on 6:01:39 PM Jun 27, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.query; + +import java.util.Iterator; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.UrlSearchResult; +import org.archive.wayback.core.UrlSearchResults; +import org.archive.wayback.core.WaybackRequest; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class UIUrlQueryResults extends UIQueryResults { + + private UrlSearchResults results; + + /** + * Constructor -- chew search result summaries into format easier for JSPs + * to digest. + * + * @param httpRequest + * @param wbRequest + * @param results + * @param uriConverter + */ + public UIUrlQueryResults(HttpServletRequest httpRequest, + WaybackRequest wbRequest, UrlSearchResults results, + ResultURIConverter uriConverter) { + super(httpRequest, wbRequest, results, uriConverter); + + this.results = results; + } + + /** + * @return Iterator of ResourceResults + */ + public Iterator<UrlSearchResult> resultsIterator() { + return results.iterator(); + } + + /** + * @return Returns the results. + */ + public UrlSearchResults getResults() { + return results; + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java 2008-07-01 23:51:43 UTC (rev 2378) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/UIReplayResult.java 2008-07-01 23:54:14 UTC (rev 2379) @@ -30,8 +30,8 @@ import javax.servlet.http.HttpServletRequest; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.UIResults; @@ -45,10 +45,11 @@ */ public class UIReplayResult extends UIResults { - private HttpServletRequest httpRequest; - private SearchResult result; + private HttpServletRequest httpRequest; + private CaptureSearchResult result; + private CaptureSearchResults results; private Resource resource; - private ResultURIConverter uriConverter; + /** * Constructor -- chew search result summaries into format easier for JSPs @@ -57,20 +58,22 @@ * @param httpRequest * @param wbRequest * @param result + * @param results * @param resource * @param uriConverter * @throws IOException */ public UIReplayResult(HttpServletRequest httpRequest, - WaybackRequest wbRequest, SearchResult result, - Resource resource, ResultURIConverter uriConverter) + WaybackRequest wbRequest, CaptureSearchResult result, + CaptureSearchResults results, Resource resource, + ResultURIConverter uriConverter) throws IOException { - super(wbRequest); + super(wbRequest,uriConverter); this.httpRequest = httpRequest; this.result = result; + this.results = results; this.resource = resource; - this.uriConverter = uriConverter; } /** @@ -90,68 +93,62 @@ /** * @return Returns the result. */ - public SearchResult getResult() { + public CaptureSearchResult getResult() { return result; } /** - * @return Returns the uriConverter. - */ - public ResultURIConverter getUriConverter() { - return uriConverter; - } - - /** - * @return Returns the wbRequest. - */ - public WaybackRequest getWbRequest() { - return wbRequest; - } - - /** * @return the original URL, or at least as close as can be rebuilt from * the index info */ public String getOriginalUrl() { - return result.get(WaybackConstants.RESULT_URL); + return result.getOriginalUrl(); } /** * @return the MimeURL key from the index of the result */ public String getUrlKey() { - return result.get(WaybackConstants.RESULT_URL_KEY); + return result.getUrlKey(); } /** * @return a string offset+arc file name combo, which should uniquely * identify this document */ public String getArchiveID() { - return result.get(WaybackConstants.RESULT_OFFSET) + "/" + - result.get(WaybackConstants.RESULT_ARC_FILE); + return result.getOffset() + "/" + result.getFile(); } /** * @return the CaptureDate Timestamp of the result */ public Timestamp getCaptureTimestamp() { - return Timestamp.parseBefore( - result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + return Timestamp.parseBefore(result.getCaptureTimestamp()); } /** * @return the MimeType String of the result */ public String getMimeType() { - return result.get(WaybackConstants.RESULT_MIME_TYPE); + return result.getMimeType(); } + /** * @return the Digest string of the result */ public String getDigest() { - return result.get(WaybackConstants.RESULT_MD5_DIGEST); + return result.getDigest(); } + /** * @return the HTTP Headers as Properties */ public Map<String,String> getHttpHeaders() { return resource.getHttpHeaders(); } + + public CaptureSearchResults getResults() { + return results; + } + + public void setResults(CaptureSearchResults results) { + this.results = results; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2378 http://archive-access.svn.sourceforge.net/archive-access/?rev=2378&view=rev Author: bradtofel Date: 2008-07-01 16:51:43 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REIMPLEMENTATION: totally rewritten to (hopefully) make it more flexible understandable, and with less code duplication. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-07-01 23:51:43 UTC (rev 2378) @@ -33,7 +33,19 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.resourceindex.filters.CaptureToUrlResultFilter; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.CaptureToUrlSearchResultAdapter; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.UrlSearchResult; +import org.archive.wayback.core.UrlSearchResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.AccessControlException; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ResourceIndexNotAvailableException; +import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.resourceindex.filters.CounterFilter; import org.archive.wayback.resourceindex.filters.DateRangeFilter; import org.archive.wayback.resourceindex.filters.DuplicateRecordFilter; @@ -45,25 +57,16 @@ import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.Timestamp; -import org.archive.wayback.core.UrlSearchResults; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.AccessControlException; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.ResourceIndexNotAvailableException; -import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; +import org.archive.wayback.util.ObjectFilterIterator; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** - * - * + * + * * @author brad * @version $Date$, $Revision$ */ @@ -82,37 +85,13 @@ private boolean dedupeRecords = false; - private ObjectFilter<SearchResult> annotater = null; + private ObjectFilter<CaptureSearchResult> annotater = null; public LocalResourceIndex() { canonicalizer = new AggressiveUrlCanonicalizer(); } - - private void filterRecords(Iterator<SearchResult> itr, - ObjectFilter<SearchResult> filter, SearchResults results, - boolean forwards) throws IOException { - if(dedupeRecords) { - itr = new AdaptedIterator<SearchResult, SearchResult>(itr, - new DeduplicationSearchResultAnnotationAdapter()); - } - while (itr.hasNext()) { - SearchResult result = itr.next(); - int ruling = filter.filterObject(result); - if (ruling == ObjectFilter.FILTER_ABORT) { - break; - } else if (ruling == ObjectFilter.FILTER_INCLUDE) { - results.addSearchResult(result, forwards); - } - } - if(itr instanceof CloseableIterator) { - CloseableIterator<SearchResult> citr = - (CloseableIterator<SearchResult>) itr; - source.cleanup(citr); - } - } - - private String getRequired(WaybackRequest wbRequest, String field, + private static String getRequired(WaybackRequest wbRequest, String field, String defaultValue) throws BadQueryException { String value = wbRequest.get(field); @@ -126,34 +105,108 @@ return value; } - private String getRequired(WaybackRequest wbRequest, String field) + private static String getRequired(WaybackRequest wbRequest, String field) throws BadQueryException { return getRequired(wbRequest, field, null); } - private HostMatchFilter getExactHostFilter(WaybackRequest wbRequest) { + private CloseableIterator<CaptureSearchResult> getCaptureIterator(String k) + throws ResourceIndexNotAvailableException { - HostMatchFilter filter = null; - String exactHostFlag = wbRequest.get( - WaybackConstants.REQUEST_EXACT_HOST_ONLY); - if(exactHostFlag != null && - exactHostFlag.equals(WaybackConstants.REQUEST_YES)) { + CloseableIterator<CaptureSearchResult> captures = + source.getPrefixIterator(k); + if(dedupeRecords) { + captures = new AdaptedIterator<CaptureSearchResult, CaptureSearchResult> + (captures, new DeduplicationSearchResultAnnotationAdapter()); + } + return captures; + } + private void cleanupIterator(CloseableIterator<? extends SearchResult> itr) + throws ResourceIndexNotAvailableException { + try { + itr.close(); + } catch (IOException e) { + e.printStackTrace(); + throw new ResourceIndexNotAvailableException( + e.getLocalizedMessage()); + } + } + + public CaptureSearchResults doCaptureQuery(WaybackRequest wbRequest, + int type) throws ResourceIndexNotAvailableException, + ResourceNotInArchiveException, BadQueryException, + AccessControlException { + + CaptureSearchResults results = new CaptureSearchResults(); - String searchUrl = wbRequest.get(WaybackConstants.REQUEST_URL); - try { + CaptureQueryFilterState filterState = + new CaptureQueryFilterState(wbRequest,canonicalizer, type); + String keyUrl = filterState.getKeyUrl(); - UURI searchURI = UURIFactory.getInstance(searchUrl); - String exactHost = searchURI.getHost(); - filter = new HostMatchFilter(exactHost); + CloseableIterator<CaptureSearchResult> itr = getCaptureIterator(keyUrl); + // set up the common Filters: + ObjectFilter<CaptureSearchResult> filter = filterState.getFilter(); + itr = new ObjectFilterIterator<CaptureSearchResult>(itr,filter); + + // Windowing: + WindowFilterState<CaptureSearchResult> window = + new WindowFilterState<CaptureSearchResult>(wbRequest); + ObjectFilter<CaptureSearchResult> windowFilter = window.getFilter(); + itr = new ObjectFilterIterator<CaptureSearchResult>(itr,windowFilter); + + + if(annotater != null) { + itr = new ObjectFilterIterator<CaptureSearchResult>(itr,annotater); + } + + while(itr.hasNext()) { + results.addSearchResult(itr.next()); + } + + filterState.annotateResults(results); + window.annotateResults(results); + cleanupIterator(itr); + return results; + } + public UrlSearchResults doUrlQuery(WaybackRequest wbRequest) + throws ResourceIndexNotAvailableException, + ResourceNotInArchiveException, BadQueryException, + AccessControlException { + + UrlSearchResults results = new UrlSearchResults(); - } catch (URIException e) { - // Really, this isn't gonna happen, we've already canonicalized - // it... should really optimize and do that just once. - e.printStackTrace(); - } + CaptureQueryFilterState filterState = + new CaptureQueryFilterState(wbRequest,canonicalizer, + CaptureQueryFilterState.TYPE_URL); + String keyUrl = filterState.getKeyUrl(); + + CloseableIterator<CaptureSearchResult> citr = getCaptureIterator(keyUrl); + // set up the common Filters: + ObjectFilter<CaptureSearchResult> filter = filterState.getFilter(); + citr = new ObjectFilterIterator<CaptureSearchResult>(citr,filter); + + // adapt into UrlSearchResult: + + CloseableIterator<UrlSearchResult> itr = + CaptureToUrlSearchResultAdapter.adaptCaptureIterator(citr); + + // Windowing: + WindowFilterState<UrlSearchResult> window = + new WindowFilterState<UrlSearchResult>(wbRequest); + ObjectFilter<UrlSearchResult> windowFilter = window.getFilter(); + itr = new ObjectFilterIterator<UrlSearchResult>(itr,windowFilter); + + while(itr.hasNext()) { + results.addSearchResult(itr.next()); } - return filter; + + filterState.annotateResults(results); + window.annotateResults(results); + cleanupIterator(itr); + + return results; } + /* * (non-Javadoc) * @@ -163,289 +216,38 @@ throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { - SearchResults results = null; // return value placeholder - - String startKey; // actual key where search will begin - String keyUrl; // "purified" URL request - int startResult; // calculated based on hits/page * pagenum - - // first grab all the info from the WaybackRequest, and validate it: - - int resultsPerPage = wbRequest.getResultsPerPage(); - int pageNum = wbRequest.getPageNum(); - startResult = (pageNum - 1) * resultsPerPage; - - if (resultsPerPage < 1) { - throw new BadQueryException("resultsPerPage cannot be < 1"); - } - if (resultsPerPage > maxRecords) { - throw new BadQueryException("resultsPerPage cannot be > " - + maxRecords); - } - if (pageNum < 1) { - throw new BadQueryException("pageNum must be > 0"); - } - - String searchUrl = getRequired(wbRequest, WaybackConstants.REQUEST_URL); String searchType = getRequired(wbRequest, WaybackConstants.REQUEST_TYPE); - String startDate = getRequired(wbRequest, - WaybackConstants.REQUEST_START_DATE, Timestamp - .earliestTimestamp().getDateStr()); - String endDate = getRequired(wbRequest, - WaybackConstants.REQUEST_END_DATE, Timestamp.latestTimestamp() - .getDateStr()); - String exactDate = getRequired(wbRequest, - WaybackConstants.REQUEST_EXACT_DATE, Timestamp - .latestTimestamp().getDateStr()); - try { - keyUrl = canonicalizer.urlStringToKey(searchUrl); - } catch (URIException e) { - throw new BadQueryException("invalid " - + WaybackConstants.REQUEST_URL + " " + searchUrl); - } - - // set up the common Filters: - - // makes sure we don't inspect too many records: prevents DOS - GuardRailFilter guardrail = new GuardRailFilter(maxRecords); - - // checks an exclusion service for every matching record - ObjectFilter<SearchResult> exclusion = wbRequest.getExclusionFilter(); - - // count how many results got to the ExclusionFilter: - CounterFilter preExCounter = new CounterFilter(); - // count how many results got past the ExclusionFilter, or how - // many total matched, if there was no ExclusionFilter: - CounterFilter finalCounter = new CounterFilter(); - - // has the user asked for only results on the exact host specified? - HostMatchFilter hostMatchFilter = getExactHostFilter(wbRequest); - if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { - results = new CaptureSearchResults(); + results = doCaptureQuery(wbRequest, + CaptureQueryFilterState.TYPE_REPLAY); - ObjectFilterChain<SearchResult> forwardFilters = - new ObjectFilterChain<SearchResult>(); - -// ObjectFilterChain<SearchResult> reverseFilters = -// new ObjectFilterChain<SearchResult>(); - - // use the same guardrail for both: - forwardFilters.addFilter(guardrail); -// reverseFilters.addFilter(guardrail); - - forwardFilters.addFilter(new DuplicateRecordFilter()); - - // match URL key: - forwardFilters.addFilter(new UrlMatchFilter(keyUrl)); -// reverseFilters.addFilter(new UrlMatchFilter(keyUrl)); - - if(hostMatchFilter != null) { - forwardFilters.addFilter(hostMatchFilter); -// reverseFilters.addFilter(hostMatchFilter); - } - - // be sure to only include records within the date range we want: - // The bin search may start the forward filters at a record older - // than we want. Since the fowardFilters only include an abort - // endDateFilter, we might otherwise include a record before the - // requested range. - DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate); - forwardFilters.addFilter(drFilter); -// reverseFilters.addFilter(drFilter); - - // abort processing if we hit a date outside the search range: - forwardFilters.addFilter(new EndDateFilter(endDate)); -// reverseFilters.addFilter(new StartDateFilter(startDate)); - - // for replay, do not include records that redirect to - // themselves.. We'll leave this for both closest and replays, - // because the only application of closest at the moment is - // timeline in which case, we don't want to show captures that - // redirect to themselves in the timeline if they are not viewable. - SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter(); - selfRedirectFilter.setCanonicalizer(canonicalizer); - forwardFilters.addFilter(selfRedirectFilter); -// reverseFilters.addFilter(selfRedirectFilter); - - // possibly filter via exclusions: - if(exclusion != null) { - forwardFilters.addFilter(preExCounter); - forwardFilters.addFilter(exclusion); - -// reverseFilters.addFilter(preExCounter); -// reverseFilters.addFilter(exclusion); - } - forwardFilters.addFilter(finalCounter); -// reverseFilters.addFilter(finalCounter); - - forwardFilters.addFilter(new WindowEndFilter(resultsPerPage)); -// int resultsPerDirection = (int) Math.floor(resultsPerPage / 2); -// reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection)); - if(annotater != null) { - forwardFilters.addFilter(annotater); - } - startKey = keyUrl; - - try { -// CloseableIterator<SearchResult> reverse = -// new AdaptedObjectFilterIterator<SearchResult>( -// source.getPrefixReverseIterator(startKey), -// reverseFilters); - -// // reverse the reverseResults: -// ArrayList<SearchResult> reverseResults = -// new ArrayList<SearchResult>(); -// while(reverse.hasNext()) { -// reverseResults.add(0, reverse.next()); -// } - - // now make a composite of the reverse and forwards: - - CloseableIterator<SearchResult> forward = - source.getPrefixIterator(startKey); -// -// CompositeIterator<SearchResult> resultsItr = -// new CompositeIterator<SearchResult>(); -// resultsItr.addComponent(reverseResults.iterator()); -// resultsItr.addComponent(forward); - - // and filter: -// filterRecords(resultsItr, forwardFilters, results, true); - filterRecords(forward, forwardFilters, results, true); - - } catch (IOException e) { - throw new ResourceIndexNotAvailableException( - e.getLocalizedMessage()); - } - } else if (searchType.equals(WaybackConstants.REQUEST_URL_QUERY)) { - results = new CaptureSearchResults(); - // build up the FilterChain(s): - ObjectFilterChain<SearchResult> filters = - new ObjectFilterChain<SearchResult>(); - filters.addFilter(guardrail); - filters.addFilter(new DuplicateRecordFilter()); + results = doCaptureQuery(wbRequest, + CaptureQueryFilterState.TYPE_CAPTURE); - filters.addFilter(new UrlMatchFilter(keyUrl)); - if(hostMatchFilter != null) { - filters.addFilter(hostMatchFilter); - } - filters.addFilter(new EndDateFilter(endDate)); - // possibly filter via exclusions: - if (exclusion != null) { - filters.addFilter(preExCounter); - filters.addFilter(exclusion); - } - filters.addFilter(finalCounter); - // OPTIMIZ: beginning the search at the startDate causes problems - // with deduplicated results. We need to be smarter about rolling - // backwards a ways if we start on a deduped record. -// startKey = keyUrl + " " + startDate; - startKey = keyUrl + " "; - - // add the start and end windowing filters: - filters.addFilter(new WindowStartFilter(startResult)); - filters.addFilter(new WindowEndFilter(resultsPerPage)); - if(annotater != null) { - filters.addFilter(annotater); - } - try { - filterRecords(source.getPrefixIterator(startKey), filters, results, - true); - } catch (IOException e) { - throw new ResourceIndexNotAvailableException( - e.getLocalizedMessage()); - } - - } else if (searchType.equals(WaybackConstants.REQUEST_URL_PREFIX_QUERY)) { - results = new UrlSearchResults(); - // build up the FilterChain(s): - ObjectFilterChain<SearchResult> filters = - new ObjectFilterChain<SearchResult>(); - filters.addFilter(guardrail); - filters.addFilter(new DuplicateRecordFilter()); + results = doUrlQuery(wbRequest); - filters.addFilter(new UrlPrefixMatchFilter(keyUrl)); - if(hostMatchFilter != null) { - filters.addFilter(hostMatchFilter); - } - filters.addFilter(new DateRangeFilter(startDate, endDate)); - // possibly filter via exclusions: - if (exclusion != null) { - filters.addFilter(preExCounter); - filters.addFilter(exclusion); - } - filters.addFilter(new CaptureToUrlResultFilter()); - filters.addFilter(finalCounter); - startKey = keyUrl; - - // add the start and end windowing filters: - filters.addFilter(new WindowStartFilter(startResult)); - filters.addFilter(new WindowEndFilter(resultsPerPage)); - if(annotater != null) { - filters.addFilter(annotater); - } - try { - filterRecords(source.getPrefixIterator(startKey), filters, results, - true); - } catch (IOException e) { - throw new ResourceIndexNotAvailableException( - e.getLocalizedMessage()); - } - } else { + throw new BadQueryException("Unknown query type(" + searchType + "), must be " + WaybackConstants.REQUEST_REPLAY_QUERY + ", " + WaybackConstants.REQUEST_CLOSEST_QUERY + ", " + WaybackConstants.REQUEST_URL_QUERY + ", or " + WaybackConstants.REQUEST_URL_PREFIX_QUERY); } - - int matched = finalCounter.getNumMatched(); - if (matched == 0) { - if (exclusion != null) { - if(preExCounter.getNumMatched() > 0) { - throw new AccessControlException("All results Excluded"); - } - } - throw new ResourceNotInArchiveException("the URL " + keyUrl - + " is not in the archive."); - } - - // now we need to set some filter properties on the results: - results.putFilter(WaybackConstants.REQUEST_URL, keyUrl); results.putFilter(WaybackConstants.REQUEST_TYPE, searchType); - results.putFilter(WaybackConstants.REQUEST_START_DATE, startDate); - results.putFilter(WaybackConstants.REQUEST_EXACT_DATE, exactDate); - results.putFilter(WaybackConstants.REQUEST_END_DATE, endDate); - - // window info - results.putFilter(WaybackConstants.RESULTS_FIRST_RETURNED, String - .valueOf(startResult)); - results.putFilter(WaybackConstants.RESULTS_REQUESTED, String - .valueOf(resultsPerPage)); - - // how many are actually in the results: - results.putFilter(WaybackConstants.RESULTS_NUM_RESULTS, String - .valueOf(matched)); - - // how many matched (includes those outside window) - results.putFilter(WaybackConstants.RESULTS_NUM_RETURNED, String - .valueOf(results.getResultCount())); - return results; } - public void addSearchResults(Iterator<SearchResult> itr) throws IOException, + public void addSearchResults(Iterator<CaptureSearchResult> itr) throws IOException, UnsupportedOperationException { if(source instanceof UpdatableSearchResultSource) { UpdatableSearchResultSource updatable = @@ -495,11 +297,194 @@ source.shutdown(); } - public ObjectFilter<SearchResult> getAnnotater() { + public ObjectFilter<CaptureSearchResult> getAnnotater() { return annotater; } - public void setAnnotater(ObjectFilter<SearchResult> annotater) { + public void setAnnotater(ObjectFilter<CaptureSearchResult> annotater) { this.annotater = annotater; } + + private class CaptureQueryFilterState { + public final static int TYPE_REPLAY = 0; + public final static int TYPE_CAPTURE = 1; + public final static int TYPE_URL = 2; + + private ObjectFilterChain<CaptureSearchResult> filter = null; + private CounterFilter finalCounter = null; + private CounterFilter preExclusionCounter = null; + private String keyUrl = null; + private String startDate; + private String endDate; + private String exactDate; + + public CaptureQueryFilterState(WaybackRequest request, + UrlCanonicalizer canonicalizer, int type) + throws BadQueryException { + + String searchUrl = getRequired(request, + WaybackConstants.REQUEST_URL); + try { + keyUrl = canonicalizer.urlStringToKey(searchUrl); + } catch (URIException e) { + throw new BadQueryException("invalid " + + WaybackConstants.REQUEST_URL + " " + searchUrl); + } + + filter = new ObjectFilterChain<CaptureSearchResult>(); + startDate = getRequired(request, + WaybackConstants.REQUEST_START_DATE, + Timestamp.earliestTimestamp().getDateStr()); + endDate = getRequired(request, + WaybackConstants.REQUEST_END_DATE, + Timestamp.latestTimestamp().getDateStr()); + if(type == TYPE_REPLAY) { + exactDate = getRequired(request, + WaybackConstants.REQUEST_EXACT_DATE, Timestamp + .latestTimestamp().getDateStr()); + } + + + finalCounter = new CounterFilter(); + preExclusionCounter = new CounterFilter(); + DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate); + + // has the user asked for only results on the exact host specified? + ObjectFilter<CaptureSearchResult> exactHost = + getExactHostFilter(request); + // checks an exclusion service for every matching record + ObjectFilter<CaptureSearchResult> exclusion = + request.getExclusionFilter(); + + + // makes sure we don't inspect too many records: prevents DOS + filter.addFilter(new GuardRailFilter(maxRecords)); + filter.addFilter(new DuplicateRecordFilter()); + + if(type == TYPE_REPLAY) { + filter.addFilter(new UrlMatchFilter(keyUrl)); + filter.addFilter(new EndDateFilter(endDate)); + SelfRedirectFilter selfRedirectFilter= new SelfRedirectFilter(); + selfRedirectFilter.setCanonicalizer(canonicalizer); + filter.addFilter(selfRedirectFilter); + } else if(type == TYPE_CAPTURE){ + filter.addFilter(new UrlMatchFilter(keyUrl)); + filter.addFilter(drFilter); + } else if(type == TYPE_URL) { + filter.addFilter(new UrlPrefixMatchFilter(keyUrl)); + } else { + throw new BadQueryException("Unknown type"); + } + + if(exactHost != null) { + filter.addFilter(exactHost); + } + + // count how many results got to the ExclusionFilter: + filter.addFilter(preExclusionCounter); + + if(exclusion != null) { + filter.addFilter(exclusion); + } + + // count how many results got past the ExclusionFilter, or how + // many total matched, if there was no ExclusionFilter: + filter.addFilter(finalCounter); + } + public String getKeyUrl() { + return keyUrl; + } + public ObjectFilter<CaptureSearchResult> getFilter() { + return filter; + } + public void annotateResults(SearchResults results) + throws AccessControlException, ResourceNotInArchiveException { + + int matched = finalCounter.getNumMatched(); + if (matched == 0) { + if (preExclusionCounter != null) { + if(preExclusionCounter.getNumMatched() > 0) { + throw new AccessControlException("All results Excluded"); + } + } + throw new ResourceNotInArchiveException("the URL " + keyUrl + + " is not in the archive."); + } + // now we need to set some filter properties on the results: + results.putFilter(WaybackConstants.REQUEST_URL, keyUrl); + results.putFilter(WaybackConstants.REQUEST_START_DATE, startDate); + results.putFilter(WaybackConstants.REQUEST_END_DATE, endDate); + if(exactDate != null) { + results.putFilter(WaybackConstants.REQUEST_EXACT_DATE, exactDate); + } + } + } + private static HostMatchFilter getExactHostFilter(WaybackRequest r) { + + HostMatchFilter filter = null; + String exactHostFlag = r.get( + WaybackConstants.REQUEST_EXACT_HOST_ONLY); + if(exactHostFlag != null && + exactHostFlag.equals(WaybackConstants.REQUEST_YES)) { + + String searchUrl = r.get(WaybackConstants.REQUEST_URL); + try { + + UURI searchURI = UURIFactory.getInstance(searchUrl); + String exactHost = searchURI.getHost(); + filter = new HostMatchFilter(exactHost); + + } catch (URIException e) { + // Really, this isn't gonna happen, we've already canonicalized + // it... should really optimize and do that just once. + e.printStackTrace(); + } + } + return filter; + } + private class WindowFilterState<T> { + int startResult; // calculated based on hits/page * pagenum + int resultsPerPage; + int pageNum; + ObjectFilterChain<T> windowFilters; + WindowStartFilter<T> startFilter; + WindowEndFilter<T> endFilter; + public WindowFilterState(WaybackRequest request) + throws BadQueryException { + + windowFilters = new ObjectFilterChain<T>(); + // first grab all the info from the WaybackRequest, and validate it: + resultsPerPage = request.getResultsPerPage(); + pageNum = request.getPageNum(); + + if (resultsPerPage < 1) { + throw new BadQueryException("resultsPerPage cannot be < 1"); + } + if (resultsPerPage > maxRecords) { + throw new BadQueryException("resultsPerPage cannot be > " + + maxRecords); + } + if (pageNum < 1) { + throw new BadQueryException("pageNum must be > 0"); + } + startResult = (pageNum - 1) * resultsPerPage; + startFilter = new WindowStartFilter<T>(startResult); + endFilter = new WindowEndFilter<T>(resultsPerPage); + windowFilters.addFilter(startFilter); + windowFilters.addFilter(endFilter); + } + public ObjectFilter<T> getFilter() { + return windowFilters; + } + public void annotateResults(SearchResults results) { + results.setFirstReturned(startResult); + results.setReturnedCount(resultsPerPage); + + // how many went by the filters: + results.setMatchingCount(startFilter.getNumSeen()); + + // how many were actually returned: + results.setReturnedCount(endFilter.getNumReturned()); + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:48:38
|
Revision: 2377 http://archive-access.svn.sourceforge.net/archive-access/?rev=2377&view=rev Author: bradtofel Date: 2008-07-01 16:48:47 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult required non-trivial changes, but that was the only tangible result. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2008-07-01 23:47:50 UTC (rev 2376) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) @@ -36,8 +36,8 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; @@ -77,7 +77,7 @@ private static final String NUTCH_DIGEST = "digest"; private static final String NUTCH_PRIMARY_TYPE = "primaryType"; private static final String NUTCH_SUB_TYPE = "subType"; - private static final String NUTCH_CAPTURE_HOST = "site"; +// private static final String NUTCH_CAPTURE_HOST = "site"; private static final String NUTCH_CAPTURE_URL = "link"; private static final String NUTCH_SEARCH_RESULT_TAG = "item"; @@ -129,7 +129,7 @@ e.getMessage()); } - SearchResults results; + CaptureSearchResults results; String type = wbRequest.get(WaybackConstants.REQUEST_TYPE); if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || type.equals(WaybackConstants.REQUEST_URL_QUERY)) { @@ -157,21 +157,21 @@ Element e = (Element) nodes.item(i); - SearchResult result = elementToSearchResult(e); + CaptureSearchResult result = elementToSearchResult(e); results.addSearchResult(result); } Element channelElement = (Element) channel.item(0); - results.putFilter(WaybackConstants.RESULTS_FIRST_RETURNED, + results.putFilter(SearchResults.RESULTS_FIRST_RETURNED, getNodeContent(channelElement,NUTCH_FIRST_RESULT)); - results.putFilter(WaybackConstants.RESULTS_NUM_RESULTS, + results.putFilter(SearchResults.RESULTS_NUM_RESULTS, getNodeContent(channelElement,NUTCH_NUM_RESULTS)); - results.putFilter(WaybackConstants.RESULTS_NUM_RETURNED, + results.putFilter(SearchResults.RESULTS_NUM_RETURNED, getNodeContent(channelElement,NUTCH_NUM_RETURNED)); - results.putFilter(WaybackConstants.RESULTS_REQUESTED, + results.putFilter(SearchResults.RESULTS_REQUESTED, String.valueOf(wbRequest.getResultsPerPage())); results.putFilter(WaybackConstants.REQUEST_START_DATE, @@ -182,13 +182,12 @@ return results; } - private SearchResult elementToSearchResult(Element e) + private CaptureSearchResult elementToSearchResult(Element e) throws ResourceIndexNotAvailableException { - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); - result.put(WaybackConstants.RESULT_ARC_FILE, - getNodeNutchContent(e,NUTCH_ARCNAME)); + result.setFile(getNodeNutchContent(e,NUTCH_ARCNAME)); // The date in nutchwax is now named 'tstamp' and its // 17 characters rather than 14. Pass first 14 only. @@ -202,27 +201,21 @@ if (d.length() == 17) { d = d.substring(0, 14); } - result.put(WaybackConstants.RESULT_CAPTURE_DATE, d); + result.setCaptureTimestamp(d); //result.put(WaybackConstants.RESULT_HTTP_CODE,getNodeContent(e,"")); - result.put(WaybackConstants.RESULT_HTTP_CODE,NUTCH_DEFAULT_HTTP_CODE); - result.put(WaybackConstants.RESULT_MD5_DIGEST, - getNodeNutchContent(e,NUTCH_DIGEST)); + result.setHttpCode(NUTCH_DEFAULT_HTTP_CODE); + result.setDigest(getNodeNutchContent(e,NUTCH_DIGEST)); - result.put(WaybackConstants.RESULT_MIME_TYPE, - getNodeNutchContent(e,NUTCH_PRIMARY_TYPE) + "/" + + result.setMimeType(getNodeNutchContent(e,NUTCH_PRIMARY_TYPE) + "/" + getNodeNutchContent(e,NUTCH_SUB_TYPE)); - result.put(WaybackConstants.RESULT_OFFSET, - getNodeNutchContent(e,NUTCH_ARCOFFSET)); + result.setOffset(Long.parseLong(getNodeNutchContent(e,NUTCH_ARCOFFSET))); - result.put(WaybackConstants.RESULT_ORIG_HOST, - getNodeNutchContent(e,NUTCH_CAPTURE_HOST)); -// result.put(WaybackConstants.RESULT_REDIRECT_URL,getNodeContent(e,"")); - result.put(WaybackConstants.RESULT_REDIRECT_URL, - NUTCH_DEFAULT_REDIRECT_URL); - result.put(WaybackConstants.RESULT_URL,getNodeContent(e, - NUTCH_CAPTURE_URL)); + result.setRedirectUrl(NUTCH_DEFAULT_REDIRECT_URL); + result.setCaptureTimestamp(getNodeContent(e,NUTCH_CAPTURE_URL)); + result.setOriginalUrl(getNodeContent(e,NUTCH_CAPTURE_URL)); + result.setUrlKey(getNodeContent(e,NUTCH_CAPTURE_URL)); return result; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-07-01 23:47:50 UTC (rev 2376) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) @@ -35,9 +35,11 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UrlSearchResult; import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; @@ -122,13 +124,13 @@ throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { - +// throw new ResourceIndexNotAvailableException("oops"); return urlToSearchResults(getRequestUrl(wbRequest), getSearchResultFilters(wbRequest)); } protected SearchResults urlToSearchResults(String requestUrl, - ObjectFilter<SearchResult> filter) + ObjectFilter<CaptureSearchResult> filter) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { @@ -190,11 +192,11 @@ } } - protected ObjectFilter<SearchResult> getSearchResultFilters( + protected ObjectFilter<CaptureSearchResult> getSearchResultFilters( WaybackRequest wbRequest) { String searchType = wbRequest.get(WaybackConstants.REQUEST_TYPE); - ObjectFilterChain<SearchResult> filters = - new ObjectFilterChain<SearchResult>(); + ObjectFilterChain<CaptureSearchResult> filters = + new ObjectFilterChain<CaptureSearchResult>(); if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { @@ -210,14 +212,14 @@ } protected SearchResults documentToSearchResults(Document document, - ObjectFilter<SearchResult> filter) { + ObjectFilter<CaptureSearchResult> filter) { SearchResults results = null; NodeList filters = getRequestFilters(document); String resultsType = getResultsType(document); if(resultsType.equals(WaybackConstants.RESULTS_TYPE_CAPTURE)) { - results = new CaptureSearchResults(); + results = documentToCaptureSearchResults(document,filter); } else { - results = new UrlSearchResults(); + results = documentToUrlSearchResults(document); } for(int i = 0; i < filters.getLength(); i++) { String key = filters.item(i).getNodeName(); @@ -226,11 +228,26 @@ results.putFilter(key,value); } } - + return results; + } + private UrlSearchResults documentToUrlSearchResults( + Document document) { + UrlSearchResults results = new UrlSearchResults(); NodeList xresults = getSearchResults(document); for(int i = 0; i < xresults.getLength(); i++) { Node xresult = xresults.item(i); - SearchResult result = searchElementToSearchResult(xresult); + UrlSearchResult result = searchElementToUrlSearchResult(xresult); + results.addSearchResult(result, true); + } + return results; + } + private CaptureSearchResults documentToCaptureSearchResults( + Document document, ObjectFilter<CaptureSearchResult> filter) { + CaptureSearchResults results = new CaptureSearchResults(); + NodeList xresults = getSearchResults(document); + for(int i = 0; i < xresults.getLength(); i++) { + Node xresult = xresults.item(i); + CaptureSearchResult result = searchElementToCaptureSearchResult(xresult); int ruling = ObjectFilter.FILTER_INCLUDE; if (filter != null) { @@ -245,11 +262,21 @@ } return results; } + private UrlSearchResult searchElementToUrlSearchResult(Node e) { - private SearchResult searchElementToSearchResult(Node e) { + UrlSearchResult result = new UrlSearchResult(); + addNodeDataToSearchResult(e,result); + return result; + } + private CaptureSearchResult searchElementToCaptureSearchResult(Node e) { - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); + addNodeDataToSearchResult(e,result); + return result; + } + private void addNodeDataToSearchResult(Node e, SearchResult result) { + NodeList chitlens = e.getChildNodes(); for(int i = 0; i < chitlens.getLength(); i++) { String key = chitlens.item(i).getNodeName(); @@ -258,7 +285,6 @@ result.put(key,value); } } - return result; } protected NodeList getRequestFilters(Document d) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:47:42
|
Revision: 2376 http://archive-access.svn.sourceforge.net/archive-access/?rev=2376&view=rev Author: bradtofel Date: 2008-07-01 16:47:50 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java 2008-07-01 23:46:33 UTC (rev 2375) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java 2008-07-01 23:47:50 UTC (rev 2376) @@ -7,7 +7,7 @@ import java.util.Iterator; import java.util.logging.Logger; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.updater.IndexClient; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.DirMaker; @@ -151,7 +151,7 @@ try { LOGGER.info("Indexing " + file.getAbsolutePath()); - CloseableIterator<SearchResult> itr = store.indexFile(file); + CloseableIterator<CaptureSearchResult> itr = store.indexFile(file); if(indexClient.addSearchResults(cdxBase, itr)) { if (!workFlagFile.delete()) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java 2008-07-01 23:46:33 UTC (rev 2375) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java 2008-07-01 23:47:50 UTC (rev 2376) @@ -28,9 +28,8 @@ import java.net.URL; import org.archive.wayback.ResourceStore; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.resourcestore.resourcefile.ArcWarcFilenameFilter; import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; @@ -50,21 +49,16 @@ private String urlPrefix = null; - public Resource retrieveResource(SearchResult result) throws IOException, + public Resource retrieveResource(CaptureSearchResult result) throws IOException, ResourceNotAvailableException { // extract ARC filename - String fileName = result.get(WaybackConstants.RESULT_ARC_FILE); + String fileName = result.getFile(); if(fileName == null || fileName.length() < 1) { throw new IOException("No ARC/WARC name in search result..."); } - // extract offset + convert to long - final String offsetString = result.get(WaybackConstants.RESULT_OFFSET); - if(offsetString == null || offsetString.length() < 1) { - throw new IOException("No ARC/WARC offset in search result..."); - } - final long offset = Long.parseLong(offsetString); + final long offset = result.getOffset(); if(!fileName.endsWith(ArcWarcFilenameFilter.ARC_SUFFIX) && !fileName.endsWith(ArcWarcFilenameFilter.ARC_GZ_SUFFIX) && !fileName.endsWith(ArcWarcFilenameFilter.WARC_SUFFIX) Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java 2008-07-01 23:46:33 UTC (rev 2375) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java 2008-07-01 23:47:50 UTC (rev 2376) @@ -29,9 +29,8 @@ import java.net.URL; import org.archive.wayback.ResourceStore; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; @@ -50,26 +49,21 @@ /* (non-Javadoc) * @see org.archive.wayback.ResourceStore#retrieveResource(org.archive.wayback.core.SearchResult) */ - public Resource retrieveResource(SearchResult result) throws IOException, + public Resource retrieveResource(CaptureSearchResult result) throws IOException, ResourceNotAvailableException { // extract ARC filename - String fileName = result.get(WaybackConstants.RESULT_ARC_FILE); + String fileName = result.getFile(); if(fileName == null || fileName.length() < 1) { throw new IOException("No ARC/WARC name in search result..."); } - // extract offset + convert to long - final String offsetString = result.get(WaybackConstants.RESULT_OFFSET); - if(offsetString == null || offsetString.length() < 1) { - throw new IOException("No ARC/WARC offset in search result..."); - } String urls[] = db.nameToUrls(fileName); if(urls == null || urls.length == 0) { throw new ResourceNotAvailableException("Unable to locate(" + fileName + ")"); } - final long offset = Long.parseLong(offsetString); + final long offset = result.getOffset(); Resource r = null; // TODO: attempt multiple threads? Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java 2008-07-01 23:46:33 UTC (rev 2375) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java 2008-07-01 23:47:50 UTC (rev 2376) @@ -8,9 +8,8 @@ import java.util.List; import org.archive.wayback.ResourceStore; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.resourcestore.indexer.ArcIndexer; @@ -49,13 +48,6 @@ indexThread.start(); } } - protected String resultToFileName(SearchResult result) { - return result.get(WaybackConstants.RESULT_ARC_FILE); - } - - protected long resultToOffset(SearchResult result) { - return Long.parseLong(result.get(WaybackConstants.RESULT_OFFSET)); - } public File getLocalFile(String fileName) { // try adding suffixes: empty string is first in the list @@ -75,10 +67,10 @@ return null; } - public Resource retrieveResource(SearchResult result) throws IOException, + public Resource retrieveResource(CaptureSearchResult result) throws IOException, ResourceNotAvailableException { - String fileName = resultToFileName(result); - long offset = resultToOffset(result); + String fileName = result.getFile(); + long offset = result.getOffset(); File file = getLocalFile(fileName); if (file == null) { @@ -92,8 +84,8 @@ } } - public CloseableIterator<SearchResult> indexFile(File dataFile) throws IOException { - CloseableIterator<SearchResult> itr = null; + public CloseableIterator<CaptureSearchResult> indexFile(File dataFile) throws IOException { + CloseableIterator<CaptureSearchResult> itr = null; String name = dataFile.getName(); if(name.endsWith(ARC_EXTENSION)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2375 http://archive-access.svn.sourceforge.net/archive-access/?rev=2375&view=rev Author: bradtofel Date: 2008-07-01 16:46:33 -0700 (Tue, 01 Jul 2008) Log Message: ----------- INITIAL REV: Iterator implementation that uses an ObjectFilter to determine if entries from the internal delegate are included. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ObjectFilterIterator.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ObjectFilterIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ObjectFilterIterator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ObjectFilterIterator.java 2008-07-01 23:46:33 UTC (rev 2375) @@ -0,0 +1,109 @@ +/* ObjectFilterIterator + * + * $Id$ + * + * Created on 2:55:48 PM Jun 28, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.util; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ObjectFilterIterator<T> implements CloseableIterator<T> { + + ObjectFilter<T> filter = null; + Iterator<T> itr = null; + boolean aborted = false; + T cachedNext = null; + public ObjectFilterIterator(CloseableIterator<T> itr, + ObjectFilter<T> filter) { + this.itr = itr; + this.filter = filter; + } + + /* (non-Javadoc) + * @see java.util.Iterator#hasNext() + */ + public boolean hasNext() { + if(cachedNext != null) { + return true; + } + if(aborted) { + return false; + } + while(cachedNext == null) { + if(!itr.hasNext()) { + aborted = true; + close(); + return false; + } + T maybeNext = itr.next(); + int ruling = filter.filterObject(maybeNext); + if(ruling == ObjectFilter.FILTER_ABORT) { + aborted = true; + close(); + return false; + } else if(ruling == ObjectFilter.FILTER_INCLUDE) { + cachedNext = maybeNext; + } + } + return true; + } + + public void close() { + if(itr instanceof CloseableIterator) { + CloseableIterator<T> citr = + (CloseableIterator<T>) itr; + try { + citr.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + /* (non-Javadoc) + * @see java.util.Iterator#next() + */ + public T next() { + if(cachedNext == null) { + throw new NoSuchElementException("call hasNext first!"); + } + T o = cachedNext; + cachedNext = null; + return o; + } + + /* (non-Javadoc) + * @see java.util.Iterator#remove() + */ + public void remove() { + // TODO Auto-generated method stub + + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:44:57
|
Revision: 2374 http://archive-access.svn.sourceforge.net/archive-access/?rev=2374&view=rev Author: bradtofel Date: 2008-07-01 16:45:04 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult this includes use of accessor methods FEATURE: IdentityUrlCanonicalizer is now default! Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ArcIndexer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WarcIndexer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java 2008-07-01 23:44:18 UTC (rev 2373) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ARCRecordToSearchResultAdapter.java 2008-07-01 23:45:04 UTC (rev 2374) @@ -26,19 +26,17 @@ import java.io.File; import java.io.IOException; -import java.util.logging.Logger; +//import java.util.logging.Logger; import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.URIException; import org.archive.io.arc.ARCRecord; import org.archive.io.arc.ARCRecordMetaData; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; -import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.util.url.IdentityUrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * @@ -47,22 +45,22 @@ * @version $Date$, $Revision$ */ public class ARCRecordToSearchResultAdapter -implements Adapter<ARCRecord,SearchResult>{ +implements Adapter<ARCRecord,CaptureSearchResult>{ - private static final Logger LOGGER = Logger.getLogger( - ARCRecordToSearchResultAdapter.class.getName()); +// private static final Logger LOGGER = Logger.getLogger( +// ARCRecordToSearchResultAdapter.class.getName()); private UrlCanonicalizer canonicalizer = null; public ARCRecordToSearchResultAdapter() { - canonicalizer = new AggressiveUrlCanonicalizer(); + canonicalizer = new IdentityUrlCanonicalizer(); } // public static SearchResult arcRecordToSearchResult(final ARCRecord rec) // throws IOException, ParseException { /* (non-Javadoc) * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public SearchResult adapt(ARCRecord rec) { + public CaptureSearchResult adapt(ARCRecord rec) { try { return adaptInner(rec); } catch (IOException e) { @@ -71,26 +69,25 @@ } } - private SearchResult adaptInner(ARCRecord rec) throws IOException { + private CaptureSearchResult adaptInner(ARCRecord rec) throws IOException { rec.close(); ARCRecordMetaData meta = rec.getMetaData(); - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); String arcName = meta.getArc(); int index = arcName.lastIndexOf(File.separator); if (index > 0 && (index + 1) < arcName.length()) { arcName = arcName.substring(index + 1); } - result.put(WaybackConstants.RESULT_ARC_FILE, arcName); - result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(meta - .getOffset())); + result.setFile(arcName); + result.setOffset(meta.getOffset()); // initialize with default HTTP code... - result.put(WaybackConstants.RESULT_HTTP_CODE, "-"); + result.setHttpCode("-"); - result.put(WaybackConstants.RESULT_MD5_DIGEST, rec.getDigestStr()); - result.put(WaybackConstants.RESULT_MIME_TYPE, meta.getMimetype()); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, meta.getDate()); + result.setDigest(rec.getDigestStr()); + result.setMimeType(meta.getMimetype()); + result.setCaptureTimestamp(meta.getDate()); String uriStr = meta.getUrl(); if (uriStr.startsWith(ARCRecord.ARC_MAGIC_NUMBER)) { @@ -100,67 +97,49 @@ if (uriStr.startsWith(WaybackConstants.DNS_URL_PREFIX)) { // skip URL + HTTP header processing for dns records... - String origHost = uriStr.substring(WaybackConstants.DNS_URL_PREFIX - .length()); - result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); - result.put(WaybackConstants.RESULT_REDIRECT_URL, "-"); - result.put(WaybackConstants.RESULT_URL, uriStr); - result.put(WaybackConstants.RESULT_URL_KEY, uriStr); + result.setOriginalUrl(uriStr); + result.setRedirectUrl("-"); + result.setUrlKey(uriStr); } else { - UURI uri = UURIFactory.getInstance(uriStr); - result.put(WaybackConstants.RESULT_URL, uriStr); + result.setOriginalUrl(uriStr); - String uriHost = uri.getHost(); - if (uriHost == null) { - LOGGER.info("No host in " + uriStr + " in " + meta.getArc()); - } else { - result.put(WaybackConstants.RESULT_ORIG_HOST, uriHost); - String statusCode = (meta.getStatusCode() == null) ? "-" : meta - .getStatusCode(); - result.put(WaybackConstants.RESULT_HTTP_CODE, statusCode); - - String redirectUrl = "-"; - Header[] headers = rec.getHttpHeaders(); - if (headers != null) { - - for (int i = 0; i < headers.length; i++) { - if (headers[i].getName().equals( - WaybackConstants.LOCATION_HTTP_HEADER)) { + String statusCode = (meta.getStatusCode() == null) ? "-" : meta + .getStatusCode(); + result.setHttpCode(statusCode); + + String redirectUrl = "-"; + Header[] headers = rec.getHttpHeaders(); + if (headers != null) { + + for (int i = 0; i < headers.length; i++) { + if (headers[i].getName().equals( + WaybackConstants.LOCATION_HTTP_HEADER)) { - String locationStr = headers[i].getValue(); - // TODO: "Location" is supposed to be absolute: - // (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) - // (section 14.30) but Content-Location can be - // relative. - // is it correct to resolve a relative Location, as - // we are? - // it's also possible to have both in the HTTP - // headers... - // should we prefer one over the other? - // right now, we're ignoring "Content-Location" - try { - UURI uriRedirect = UURIFactory.getInstance(uri, - locationStr); - redirectUrl = uriRedirect.getEscapedURI(); - - } catch (URIException e) { - LOGGER.info("Bad Location: " + locationStr - + " for " + uriStr + " in " - + meta.getArc() + " Skipped"); - } - break; - } + String locationStr = headers[i].getValue(); + // TODO: "Location" is supposed to be absolute: + // (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) + // (section 14.30) but Content-Location can be + // relative. + // is it correct to resolve a relative Location, as + // we are? + // it's also possible to have both in the HTTP + // headers... + // should we prefer one over the other? + // right now, we're ignoring "Content-Location" + redirectUrl = UrlOperations.resolveUrl(uriStr, + locationStr); + + break; } } - result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl); + result.setRedirectUrl(redirectUrl); - String indexUrl = canonicalizer.urlStringToKey(meta.getUrl()); - result.put(WaybackConstants.RESULT_URL_KEY, indexUrl); + String urlKey = canonicalizer.urlStringToKey(meta.getUrl()); + result.setUrlKey(urlKey); } - } return result; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ArcIndexer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ArcIndexer.java 2008-07-01 23:44:18 UTC (rev 2373) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/ArcIndexer.java 2008-07-01 23:45:04 UTC (rev 2374) @@ -34,7 +34,7 @@ import org.archive.io.arc.ARCReaderFactory; import org.archive.io.arc.ARCRecord; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; @@ -43,7 +43,7 @@ import org.archive.wayback.util.url.IdentityUrlCanonicalizer; /** - * Transforms an ARC file into Iterator<SearchResult>. + * Transforms an ARC file into Iterator<CaptureSearchResult>. * * @author brad * @version $Date$, $Revision$ @@ -65,7 +65,7 @@ * @return Iterator of SearchResults for input arc File * @throws IOException */ - public CloseableIterator<SearchResult> iterator(File arc) + public CloseableIterator<CaptureSearchResult> iterator(File arc) throws IOException { return iterator(ARCReaderFactory.get(arc)); } @@ -75,7 +75,7 @@ * @return Iterator of SearchResults for input pathOrUrl * @throws IOException */ - public CloseableIterator<SearchResult> iterator(String pathOrUrl) + public CloseableIterator<CaptureSearchResult> iterator(String pathOrUrl) throws IOException { return iterator(ARCReaderFactory.get(pathOrUrl)); } @@ -85,7 +85,7 @@ * @return Iterator of SearchResults for input ARCReader * @throws IOException */ - public CloseableIterator<SearchResult> iterator(ARCReader arcReader) + public CloseableIterator<CaptureSearchResult> iterator(ARCReader arcReader) throws IOException { arcReader.setParseHttpHeaders(true); @@ -102,7 +102,7 @@ CloseableIterator<ARCRecord> itr2 = new AdaptedIterator<ArchiveRecord,ARCRecord>(itr1,adapter1); - return new AdaptedIterator<ARCRecord,SearchResult>(itr2,adapter2); + return new AdaptedIterator<ARCRecord,CaptureSearchResult>(itr2,adapter2); } public UrlCanonicalizer getCanonicalizer() { @@ -146,7 +146,7 @@ } else { USAGE(); } - Iterator<SearchResult> res = indexer.iterator(arc); + Iterator<CaptureSearchResult> res = indexer.iterator(arc); Iterator<String> lines = SearchResultToCDXLineAdapter.adapt(res); while(lines.hasNext()) { pw.println(lines.next()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2008-07-01 23:44:18 UTC (rev 2373) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WARCRecordToSearchResultAdapter.java 2008-07-01 23:45:04 UTC (rev 2374) @@ -2,25 +2,23 @@ import java.io.File; import java.io.IOException; -import java.util.logging.Logger; +//import java.util.logging.Logger; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpParser; import org.apache.commons.httpclient.StatusLine; -import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.util.EncodingUtil; import org.archive.io.ArchiveRecordHeader; import org.archive.io.RecoverableIOException; import org.archive.io.arc.ARCConstants; import org.archive.io.warc.WARCConstants; import org.archive.io.warc.WARCRecord; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * Adapts certain WARCRecords into SearchResults. DNS and response records are @@ -34,24 +32,12 @@ * @version $Date$, $Revision$ */ public class WARCRecordToSearchResultAdapter -implements Adapter<WARCRecord,SearchResult>{ +implements Adapter<WARCRecord,CaptureSearchResult>{ private final static String DEFAULT_VALUE = "-"; - private final static String SEARCH_FIELDS[] = { - WaybackConstants.RESULT_URL, - WaybackConstants.RESULT_URL_KEY, - WaybackConstants.RESULT_ORIG_HOST, - WaybackConstants.RESULT_CAPTURE_DATE, - WaybackConstants.RESULT_MD5_DIGEST, - WaybackConstants.RESULT_MIME_TYPE, - WaybackConstants.RESULT_HTTP_CODE, - WaybackConstants.RESULT_REDIRECT_URL, - WaybackConstants.RESULT_ARC_FILE, - WaybackConstants.RESULT_OFFSET, - }; - private static final Logger LOGGER = Logger.getLogger( - WARCRecordToSearchResultAdapter.class.getName()); +// private static final Logger LOGGER = Logger.getLogger( +// WARCRecordToSearchResultAdapter.class.getName()); private UrlCanonicalizer canonicalizer = null; @@ -62,7 +48,7 @@ /* (non-Javadoc) * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public SearchResult adapt(WARCRecord rec) { + public CaptureSearchResult adapt(WARCRecord rec) { try { return adaptInner(rec); } catch (IOException e) { @@ -117,75 +103,58 @@ return orig; } - private SearchResult getBlankSearchResult() { - SearchResult result = new SearchResult(); - for(String field : SEARCH_FIELDS) { - result.put(field, DEFAULT_VALUE); - } + private CaptureSearchResult getBlankSearchResult() { + CaptureSearchResult result = new CaptureSearchResult(); + + result.setUrlKey(DEFAULT_VALUE); + result.setOriginalUrl(DEFAULT_VALUE); + result.setCaptureTimestamp(DEFAULT_VALUE); + result.setDigest(DEFAULT_VALUE); + result.setMimeType(DEFAULT_VALUE); + result.setHttpCode(DEFAULT_VALUE); + result.setRedirectUrl(DEFAULT_VALUE); + result.setFile(DEFAULT_VALUE); + result.setOffset(0); return result; } - private UURI addUrlDataToSearchResult(SearchResult result, String urlStr) + private void addUrlDataToSearchResult(CaptureSearchResult result, String urlStr) throws IOException { - result.put(WaybackConstants.RESULT_URL, urlStr); - result.put(WaybackConstants.RESULT_URL_KEY, urlStr); - - - UURI uri = UURIFactory.getInstance(urlStr); - String uriHost = uri.getHost(); - if (uriHost == null) { - - LOGGER.info("No host in " + urlStr); - - } else { - - result.put(WaybackConstants.RESULT_ORIG_HOST, uriHost); - } - + result.setOriginalUrl(urlStr); String urlKey = canonicalizer.urlStringToKey(urlStr); - result.put(WaybackConstants.RESULT_URL_KEY, urlKey); - - return uri; + result.setUrlKey(urlKey); } - private SearchResult adaptDNS(ArchiveRecordHeader header, WARCRecord rec) + private CaptureSearchResult adaptDNS(ArchiveRecordHeader header, WARCRecord rec) throws IOException { - SearchResult result = getBlankSearchResult(); + CaptureSearchResult result = getBlankSearchResult(); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, - transformDate(header.getDate())); - result.put(WaybackConstants.RESULT_ARC_FILE, - transformWarcFilename(header.getReaderIdentifier())); - result.put(WaybackConstants.RESULT_OFFSET, - String.valueOf(header.getOffset())); + result.setCaptureTimestamp(transformDate(header.getDate())); + result.setFile(transformWarcFilename(header.getReaderIdentifier())); + result.setOffset(header.getOffset()); String uriStr = header.getUrl(); - String origHost = uriStr.substring(WaybackConstants.DNS_URL_PREFIX - .length()); - result.put(WaybackConstants.RESULT_MIME_TYPE, header.getMimetype()); + result.setMimeType(header.getMimetype()); - result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); - result.put(WaybackConstants.RESULT_URL, uriStr); - result.put(WaybackConstants.RESULT_URL_KEY, uriStr); + result.setOriginalUrl(uriStr); + result.setUrlKey(uriStr); rec.close(); - result.put(WaybackConstants.RESULT_MD5_DIGEST, rec.getDigestStr()); + result.setDigest(rec.getDigestStr()); return result; } - private SearchResult adaptRevisit(ArchiveRecordHeader header, WARCRecord rec) + private CaptureSearchResult adaptRevisit(ArchiveRecordHeader header, WARCRecord rec) throws IOException { - SearchResult result = getBlankSearchResult(); + CaptureSearchResult result = getBlankSearchResult(); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, - transformDate(header.getDate())); - result.put(WaybackConstants.RESULT_MD5_DIGEST, - transformDigest(header.getHeaderValue( + result.setCaptureTimestamp(transformDate(header.getDate())); + result.setDigest(transformDigest(header.getHeaderValue( WARCRecord.HEADER_KEY_PAYLOAD_DIGEST))); addUrlDataToSearchResult(result,header.getUrl()); @@ -211,20 +180,17 @@ return count; } - private SearchResult adaptResponse(ArchiveRecordHeader header, WARCRecord rec) + private CaptureSearchResult adaptResponse(ArchiveRecordHeader header, WARCRecord rec) throws IOException { - SearchResult result = getBlankSearchResult(); + CaptureSearchResult result = getBlankSearchResult(); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, - transformDate(header.getDate())); - result.put(WaybackConstants.RESULT_ARC_FILE, - transformWarcFilename(header.getReaderIdentifier())); - result.put(WaybackConstants.RESULT_OFFSET, - String.valueOf(header.getOffset())); + result.setCaptureTimestamp(transformDate(header.getDate())); + result.setFile(transformWarcFilename(header.getReaderIdentifier())); + result.setOffset(header.getOffset()); String origUrl = header.getUrl(); - UURI uri = addUrlDataToSearchResult(result,origUrl); + addUrlDataToSearchResult(result,origUrl); // need to parse the documents HTTP message and headers here: WARCReader // does not implement this... yet.. @@ -242,15 +208,13 @@ throw new RecoverableIOException("Failed parse of http status line."); } StatusLine status = new StatusLine(statusLine); - result.put(WaybackConstants.RESULT_HTTP_CODE, - String.valueOf(status.getStatusCode())); + result.setHttpCode(String.valueOf(status.getStatusCode())); Header[] headers = HttpParser.parseHeaders(rec, ARCConstants.DEFAULT_ENCODING); rec.close(); - result.put(WaybackConstants.RESULT_MD5_DIGEST, - transformDigest(header.getHeaderValue( + result.setDigest(transformDigest(header.getHeaderValue( WARCRecord.HEADER_KEY_PAYLOAD_DIGEST))); if (headers != null) { @@ -270,28 +234,19 @@ // headers... // should we prefer one over the other? // right now, we're ignoring "Content-Location" - try { - UURI uriRedirect = UURIFactory.getInstance(uri, - locationStr); - result.put(WaybackConstants.RESULT_REDIRECT_URL, - uriRedirect.getEscapedURI()); - } catch (URIException e) { - LOGGER.info("Bad Location: " + locationStr - + " for " + origUrl + " in " - + header.getReaderIdentifier() + " Skipped"); - } + result.setRedirectUrl( + UrlOperations.resolveUrl(origUrl, locationStr)); } else if(httpHeader.getName().toLowerCase().equals("content-type")) { - result.put(WaybackConstants.RESULT_MIME_TYPE, - transformHTTPMime(httpHeader.getValue())); + result.setMimeType(transformHTTPMime(httpHeader.getValue())); } } } return result; } - private SearchResult adaptInner(WARCRecord rec) throws IOException { + private CaptureSearchResult adaptInner(WARCRecord rec) throws IOException { - SearchResult result = null; + CaptureSearchResult result = null; ArchiveRecordHeader header = rec.getHeader(); String type = header.getHeaderValue(WARCConstants.HEADER_KEY_TYPE).toString(); if(type.equals(WARCConstants.RESPONSE)) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WarcIndexer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WarcIndexer.java 2008-07-01 23:44:18 UTC (rev 2373) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/WarcIndexer.java 2008-07-01 23:45:04 UTC (rev 2374) @@ -10,7 +10,7 @@ import org.archive.io.warc.WARCReaderFactory; import org.archive.io.warc.WARCRecord; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; @@ -35,7 +35,7 @@ * @return Iterator of SearchResults for input arc File * @throws IOException */ - public CloseableIterator<SearchResult> iterator(File warc) + public CloseableIterator<CaptureSearchResult> iterator(File warc) throws IOException { return iterator(WARCReaderFactory.get(warc)); } @@ -44,7 +44,7 @@ * @return Iterator of SearchResults for input pathOrUrl * @throws IOException */ - public CloseableIterator<SearchResult> iterator(String pathOrUrl) + public CloseableIterator<CaptureSearchResult> iterator(String pathOrUrl) throws IOException { return iterator(WARCReaderFactory.get(pathOrUrl)); } @@ -53,7 +53,7 @@ * @return Iterator of SearchResults for input arc File * @throws IOException */ - public CloseableIterator<SearchResult> iterator(WARCReader reader) + public CloseableIterator<CaptureSearchResult> iterator(WARCReader reader) throws IOException { Adapter<ArchiveRecord, WARCRecord> adapter1 = new ArchiveRecordToWARCRecordAdapter(); @@ -68,7 +68,7 @@ CloseableIterator<WARCRecord> itr2 = new AdaptedIterator<ArchiveRecord, WARCRecord>(itr1, adapter1); - return new AdaptedIterator<WARCRecord, SearchResult>(itr2, adapter2); + return new AdaptedIterator<WARCRecord, CaptureSearchResult>(itr2, adapter2); } public UrlCanonicalizer getCanonicalizer() { @@ -112,7 +112,7 @@ } else { USAGE(); } - Iterator<SearchResult> res = indexer.iterator(arc); + Iterator<CaptureSearchResult> res = indexer.iterator(arc); Iterator<String> lines = SearchResultToCDXLineAdapter.adapt(res); while (lines.hasNext()) { pw.println(lines.next()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2373 http://archive-access.svn.sourceforge.net/archive-access/?rev=2373&view=rev Author: bradtofel Date: 2008-07-01 16:44:18 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java 2008-07-01 23:43:08 UTC (rev 2372) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexWorker.java 2008-07-01 23:44:18 UTC (rev 2373) @@ -28,7 +28,7 @@ import java.util.logging.Logger; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.updater.IndexClient; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.util.CloseableIterator; @@ -102,7 +102,7 @@ try { if(pathsOrUrls != null) { for(String pathOrUrl : pathsOrUrls) { - CloseableIterator<SearchResult> itr = indexFile(pathOrUrl); + CloseableIterator<CaptureSearchResult> itr = indexFile(pathOrUrl); target.addSearchResults(name, itr); itr.close(); break; @@ -115,10 +115,10 @@ return worked; } - public CloseableIterator<SearchResult> indexFile(String pathOrUrl) + public CloseableIterator<CaptureSearchResult> indexFile(String pathOrUrl) throws IOException { - CloseableIterator<SearchResult> itr = null; + CloseableIterator<CaptureSearchResult> itr = null; if(pathOrUrl.endsWith(ARC_EXTENSION)) { itr = arcIndexer.iterator(pathOrUrl); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2372 http://archive-access.svn.sourceforge.net/archive-access/?rev=2372&view=rev Author: bradtofel Date: 2008-07-01 16:43:08 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult moved exception .jsps into /exception/ directory under webapp REFACTOR: internal method to determine which .jsp to use for exceptions, allowing simpler overriding of the ExceptionRenderer class. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2008-07-01 23:41:30 UTC (rev 2371) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2008-07-01 23:43:08 UTC (rev 2372) @@ -52,18 +52,18 @@ * @version $Date$, $Revision$ */ public class BaseExceptionRenderer implements ExceptionRenderer { - private String xmlErrorJsp = "/jsp/XMLError.jsp"; - private String errorJsp = "/jsp/HTMLError.jsp"; - private String imageErrorJsp = "/jsp/HTMLError.jsp"; - private String javascriptErrorJsp = "/jsp/JavaScriptError.jsp"; - private String cssErrorJsp = "/jsp/CSSError.jsp"; + private String xmlErrorJsp = "/exception/XMLError.jsp"; + private String errorJsp = "/exception/HTMLError.jsp"; + private String imageErrorJsp = "/exception/HTMLError.jsp"; + private String javascriptErrorJsp = "/exception/JavaScriptError.jsp"; + private String cssErrorJsp = "/exception/CSSError.jsp"; protected final Pattern IMAGE_REGEX = Pattern .compile(".*\\.(jpg|jpeg|gif|png|bmp|tiff|tif)$"); /* ERROR HANDLING RESPONSES: */ - private boolean requestIsEmbedded(HttpServletRequest httpRequest, + protected boolean requestIsEmbedded(HttpServletRequest httpRequest, WaybackRequest wbRequest) { // without a wbRequest, assume it is not embedded: send back HTML if (wbRequest == null) { @@ -73,7 +73,7 @@ return (referer != null && referer.length() > 0); } - private boolean requestIsImage(HttpServletRequest httpRequest, + protected boolean requestIsImage(HttpServletRequest httpRequest, WaybackRequest wbRequest) { String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); if (requestUrl == null) @@ -82,34 +82,30 @@ return (matcher != null && matcher.matches()); } - private boolean requestIsJavascript(HttpServletRequest httpRequest, + protected boolean requestIsJavascript(HttpServletRequest httpRequest, WaybackRequest wbRequest) { String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); return (requestUrl != null) && requestUrl.endsWith(".js"); } - private boolean requestIsCSS(HttpServletRequest httpRequest, + protected boolean requestIsCSS(HttpServletRequest httpRequest, WaybackRequest wbRequest) { String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); return (requestUrl != null) && requestUrl.endsWith(".css"); } - /* (non-Javadoc) - * @see org.archive.wayback.ExceptionRenderer#renderException(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.exception.WaybackException) - */ - public void renderException(HttpServletRequest httpRequest, + public String getExceptionHandler(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - WaybackException exception) throws ServletException, IOException { - + WaybackException exception) { // the "standard HTML" response handler: - String finalJspPath = errorJsp; + String jspPath = errorJsp; if(wbRequest.isQueryRequest()) { if(wbRequest.containsKey(WaybackConstants.REQUEST_XML_DATA)) { - finalJspPath = xmlErrorJsp; + jspPath = xmlErrorJsp; } } else if (requestIsEmbedded(httpRequest, wbRequest)) { @@ -121,27 +117,38 @@ if (requestIsJavascript(httpRequest, wbRequest)) { - finalJspPath = javascriptErrorJsp; + jspPath = javascriptErrorJsp; } else if (requestIsCSS(httpRequest, wbRequest)) { - finalJspPath = cssErrorJsp; + jspPath = cssErrorJsp; } else if (requestIsImage(httpRequest, wbRequest)) { - finalJspPath = imageErrorJsp; + jspPath = imageErrorJsp; } } + return jspPath; + } + /* (non-Javadoc) + * @see org.archive.wayback.ExceptionRenderer#renderException(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.exception.WaybackException) + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) throws ServletException, IOException { + String jspPath = getExceptionHandler(httpRequest, httpResponse, + wbRequest, exception); + httpRequest.setAttribute("exception", exception); - UIResults uiResults = new UIResults(wbRequest); - uiResults.storeInRequest(httpRequest, finalJspPath); + UIResults uiResults = new UIResults(wbRequest,null); + uiResults.storeInRequest(httpRequest, jspPath); RequestDispatcher dispatcher = httpRequest - .getRequestDispatcher(finalJspPath); + .getRequestDispatcher(jspPath); if(dispatcher == null) { - throw new ServletException("Null dispatcher for " + finalJspPath); + throw new ServletException("Null dispatcher for " + jspPath); } dispatcher.forward(httpRequest, httpResponse); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2371 http://archive-access.svn.sourceforge.net/archive-access/?rev=2371&view=rev Author: bradtofel Date: 2008-07-01 16:41:30 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java 2008-07-01 23:41:12 UTC (rev 2370) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java 2008-07-01 23:41:30 UTC (rev 2371) @@ -26,8 +26,8 @@ import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.TransparentReplayRenderer; @@ -46,7 +46,7 @@ * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ public ReplayRenderer getRenderer(WaybackRequest wbRequest, - SearchResult result, Resource resource) { + CaptureSearchResult result, Resource resource) { // always use the transparent: return renderer; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:41:03
|
Revision: 2370 http://archive-access.svn.sourceforge.net/archive-access/?rev=2370&view=rev Author: bradtofel Date: 2008-07-01 16:41:12 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-07-01 23:40:50 UTC (rev 2369) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-07-01 23:41:12 UTC (rev 2370) @@ -27,8 +27,8 @@ import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.DateRedirectReplayRenderer; import org.archive.wayback.replay.TransparentReplayRenderer; @@ -56,12 +56,12 @@ * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ public ReplayRenderer getRenderer(WaybackRequest wbRequest, - SearchResult result, Resource resource) { + CaptureSearchResult result, Resource resource) { // if the result is not for the exact date requested, redirect to the // exact date. some capture dates are not 14 digits, only compare as // many digits as are in the result date: String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE); - String resDateStr = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + String resDateStr = result.getCaptureTimestamp(); if((resDateStr.length() > reqDateStr.length()) || !resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { return redirect; @@ -70,12 +70,10 @@ // HTML and XHTML docs smaller than some size get marked up as HTML if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { - if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( - TEXT_HTML_MIME)) { + if (-1 != result.getMimeType().indexOf(TEXT_HTML_MIME)) { return html; } - if (-1 != result.get(WaybackConstants.RESULT_MIME_TYPE).indexOf( - TEXT_XHTML_MIME)) { + if (-1 != result.getMimeType().indexOf(TEXT_XHTML_MIME)) { return html; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayRenderer.java 2008-07-01 23:41:12 UTC (rev 2370) @@ -36,8 +36,8 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; @@ -69,8 +69,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); @@ -82,7 +82,7 @@ HTMLPage page = new HTMLPage(resource,result,uriConverter); page.readFully(); - String resourceTS = result.getCaptureDate(); + String resourceTS = result.getCaptureTimestamp(); String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); @@ -108,7 +108,7 @@ * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, SearchResult result) { + ResultURIConverter uriConverter, CaptureSearchResult result) { String keyUp = key.toUpperCase(); // omit Content-Length header @@ -119,12 +119,13 @@ // rewrite Location header URLs if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP)) { - String baseUrl = result.getAbsoluteUrl(); - String cd = result.getCaptureDate(); + String baseUrl = result.getOriginalUrl(); + String resourceTS = result.getCaptureTimestamp(); + String captureTS = Timestamp.parseBefore(resourceTS).getDateStr(); // by the spec, these should be absolute already, but just in case: String u = UrlOperations.resolveUrl(baseUrl, value); - output.put(key, uriConverter.makeReplayURI(cd,u)); + output.put(key, uriConverter.makeReplayURI(captureTS,u)); } else { // others go out as-is: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:40:40
|
Revision: 2369 http://archive-access.svn.sourceforge.net/archive-access/?rev=2369&view=rev Author: bradtofel Date: 2008-07-01 16:40:50 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlASXReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -9,8 +9,8 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; @@ -22,8 +22,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlCSSReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -9,8 +9,8 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; @@ -22,8 +22,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -30,7 +30,7 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.DateRedirectReplayRenderer; @@ -71,13 +71,13 @@ * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ public ReplayRenderer getRenderer(WaybackRequest wbRequest, - SearchResult result, Resource resource) { + CaptureSearchResult result, Resource resource) { // if the result is not for the exact date requested, redirect to the // exact date. some capture dates are not 14 digits, only compare as // many digits as are in the result date: - String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_EXACT_DATE); - String resDateStr = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + String reqDateStr = wbRequest.get(WaybackConstants.REQUEST_DATE); + String resDateStr = result.getCaptureTimestamp(); if(!resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { return redirect; } @@ -85,7 +85,7 @@ // only bother attempting markup on pages smaller than some size: if (resource.getRecordLength() < MAX_HTML_MARKUP_LENGTH) { - String resultMime = result.get(WaybackConstants.RESULT_MIME_TYPE); + String resultMime = result.getMimeType(); // HTML and XHTML docs get marked up as HTML if (-1 != resultMime.indexOf(TEXT_HTML_MIME)) { return archivalHTML; @@ -100,7 +100,7 @@ if (-1 != resultMime.indexOf(ASX_MIME)) { return archivalASX; } - String resultPath = result.get(WaybackConstants.RESULT_URL_KEY); + String resultPath = result.getUrlKey(); resultPath = resultPath.substring(resultPath.indexOf('/')); int queryIdx = resultPath.indexOf('?'); if(queryIdx > 0) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -36,8 +36,8 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.HTMLPage; @@ -67,8 +67,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { StringBuilder toInsert = new StringBuilder(300); @@ -98,7 +98,7 @@ Iterator<String> itr = jspInserts.iterator(); while(itr.hasNext()) { toInsert.append(page.includeJspString(itr.next(), httpRequest, - httpResponse, wbRequest, results, result)); + httpResponse, wbRequest, results, result, resource)); } } @@ -129,7 +129,7 @@ * @see org.archive.wayback.replay.HttpHeaderProcessor#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, SearchResult result) { + ResultURIConverter uriConverter, CaptureSearchResult result) { String keyUp = key.toUpperCase(); @@ -142,8 +142,8 @@ if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { - String baseUrl = result.getAbsoluteUrl(); - String cd = result.getCaptureDate(); + String baseUrl = result.getOriginalUrl(); + String cd = result.getCaptureTimestamp(); // by the spec, these should be absolute already, but just in case: String u = UrlOperations.resolveUrl(baseUrl, value); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2008-07-01 23:40:18 UTC (rev 2368) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlTransparentReplayRenderer.java 2008-07-01 23:40:50 UTC (rev 2369) @@ -3,7 +3,7 @@ import java.util.Map; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.replay.TransparentReplayRenderer; import org.archive.wayback.util.url.UrlOperations; @@ -21,7 +21,7 @@ * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, SearchResult result) { + ResultURIConverter uriConverter, CaptureSearchResult result) { String keyUp = key.toUpperCase(); @@ -29,8 +29,8 @@ if (keyUp.startsWith(HTTP_LOCATION_HEADER_UP) || keyUp.startsWith(HTTP_CONTENT_BASE_HEADER_UP)) { - String baseUrl = result.getAbsoluteUrl(); - String cd = result.getCaptureDate(); + String baseUrl = result.getOriginalUrl(); + String cd = result.getCaptureTimestamp(); // by the spec, these should be absolute already, but just in case: String u = UrlOperations.resolveUrl(baseUrl, value); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:40:09
|
Revision: 2368 http://archive-access.svn.sourceforge.net/archive-access/?rev=2368&view=rev Author: bradtofel Date: 2008-07-01 16:40:18 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult INTERFACE: changed names of two render* methods to match Capture/Url scheme. TWEAK: Moved query specific .jsps to /query/ directory in webapp TWEAK: allow separate XML renderers for Url/Capture results. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2008-07-01 23:38:20 UTC (rev 2367) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2008-07-01 23:40:18 UTC (rev 2368) @@ -34,7 +34,8 @@ import org.archive.wayback.QueryRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; /** @@ -47,9 +48,10 @@ */ public class Renderer implements QueryRenderer { - private String captureJsp = "/jsp/HTMLResults.jsp"; - private String urlJsp = "/jsp/HTMLResults.jsp"; - private String xmlJsp = "/jsp/XMLResults.jsp"; + private String captureJsp = "/query/HTMLCaptureResults.jsp"; + private String urlJsp = "/query/HTMLUrlResults.jsp"; + private String xmlCaptureJsp = "/query/XMLCaptureResults.jsp"; + private String xmlUrlJsp = "/query/XMLUrlResults.jsp"; /** * @param request @@ -66,16 +68,16 @@ dispatcher.forward(request, response); } - public void renderUrlResults(HttpServletRequest httpRequest, + public void renderCaptureResults(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResults results, ResultURIConverter uriConverter) + CaptureSearchResults results, ResultURIConverter uriConverter) throws ServletException, IOException { - UIQueryResults uiResults = new UIQueryResults(httpRequest, wbRequest, + UICaptureQueryResults uiResults = new UICaptureQueryResults(httpRequest, wbRequest, results, uriConverter); String jsp = captureJsp; if(wbRequest.containsKey(WaybackConstants.REQUEST_XML_DATA)) { - jsp = xmlJsp; + jsp = xmlCaptureJsp; } uiResults.storeInRequest(httpRequest,jsp); @@ -86,16 +88,16 @@ /* (non-Javadoc) * @see org.archive.wayback.QueryRenderer#renderUrlPrefixResults(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResults, org.archive.wayback.ResultURIConverter) */ - public void renderUrlPrefixResults(HttpServletRequest httpRequest, + public void renderUrlResults(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResults results, ResultURIConverter uriConverter) + UrlSearchResults results, ResultURIConverter uriConverter) throws ServletException, IOException { - UIQueryResults uiResults = new UIQueryResults(httpRequest, wbRequest, + UIUrlQueryResults uiResults = new UIUrlQueryResults(httpRequest, wbRequest, results, uriConverter); String jsp = urlJsp; if(wbRequest.containsKey(WaybackConstants.REQUEST_XML_DATA)) { - jsp = xmlJsp; + jsp = xmlUrlJsp; } uiResults.storeInRequest(httpRequest,jsp); @@ -132,16 +134,29 @@ } /** - * @return the xmlJsp + * @return the xmlCaptureJsp */ - public String getXmlJsp() { - return xmlJsp; + public String getXmlCaptureJsp() { + return xmlCaptureJsp; } /** - * @param xmlJsp the xmlJsp to set + * @param xmlCaptureJsp the xmlCaptureJsp to set */ - public void setXmlJsp(String xmlJsp) { - this.xmlJsp = xmlJsp; + public void setXmlCaptureJsp(String xmlCaptureJsp) { + this.xmlCaptureJsp = xmlCaptureJsp; } + /** + * @return the xmlUrlJsp + */ + public String getXmlUrlJsp() { + return xmlUrlJsp; + } + + /** + * @param xmlUrlJsp the xmlUrlJsp to set + */ + public void setXmlUrlJsp(String xmlUrlJsp) { + this.xmlUrlJsp = xmlUrlJsp; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:38:11
|
Revision: 2367 http://archive-access.svn.sourceforge.net/archive-access/?rev=2367&view=rev Author: bradtofel Date: 2008-07-01 16:38:20 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java 2008-07-01 23:36:18 UTC (rev 2366) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java 2008-07-01 23:38:20 UTC (rev 2367) @@ -30,7 +30,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; /** @@ -52,9 +53,9 @@ * @throws ServletException * @throws IOException */ - public void renderUrlResults(HttpServletRequest httpRequest, + public void renderCaptureResults(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResults results, ResultURIConverter uriConverter) + CaptureSearchResults results, ResultURIConverter uriConverter) throws ServletException, IOException; /** Show the SearchResults of the request which may have resulted in @@ -69,9 +70,9 @@ * @throws ServletException * @throws IOException */ - public void renderUrlPrefixResults(HttpServletRequest httpRequest, + public void renderUrlResults(HttpServletRequest httpRequest, HttpServletResponse response, WaybackRequest wbRequest, - SearchResults results, ResultURIConverter uriConverter) + UrlSearchResults results, ResultURIConverter uriConverter) throws ServletException, IOException; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java 2008-07-01 23:36:18 UTC (rev 2366) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java 2008-07-01 23:38:20 UTC (rev 2367) @@ -25,7 +25,7 @@ package org.archive.wayback; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.WaybackRequest; /** @@ -45,5 +45,5 @@ * @return the correct ReplayRenderer for the Resource */ public ReplayRenderer getRenderer(WaybackRequest wbRequest, - SearchResult result, Resource resource); + CaptureSearchResult result, Resource resource); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java 2008-07-01 23:36:18 UTC (rev 2366) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayRenderer.java 2008-07-01 23:38:20 UTC (rev 2367) @@ -31,8 +31,8 @@ import javax.servlet.http.HttpServletResponse; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.WaybackException; @@ -50,11 +50,11 @@ * @param httpRequest the HttpServletRequest * @param httpResponse the HttpServletResponse * @param wbRequest the WaybackRequest that returned the results - * @param result actual SearchResult that maps to resource to replay + * @param result actual CaptureSearchResult that maps to resource to replay * @param resource resource to replay * @param uriConverter the URI converter to use to translate matching * results into replayable URLs - * @param results all SearchResults that were returned from the + * @param results all CaptureSearchResults that were returned from the * ResourceIndex, probably including other capture * dates of the same URL. * @throws ServletException @@ -63,7 +63,7 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, WaybackException; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java 2008-07-01 23:36:18 UTC (rev 2366) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ResourceStore.java 2008-07-01 23:38:20 UTC (rev 2367) @@ -26,7 +26,7 @@ import java.io.IOException; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceNotAvailableException; /** @@ -44,7 +44,7 @@ * @throws IOException * @throws ResourceNotAvailableException */ - public Resource retrieveResource(SearchResult result) throws IOException, + public Resource retrieveResource(CaptureSearchResult result) throws IOException, ResourceNotAvailableException; public void shutdown() throws IOException; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:36:10
|
Revision: 2366 http://archive-access.svn.sourceforge.net/archive-access/?rev=2366&view=rev Author: bradtofel Date: 2008-07-01 16:36:18 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java 2008-07-01 23:34:52 UTC (rev 2365) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/DateRedirectReplayRenderer.java 2008-07-01 23:36:18 UTC (rev 2366) @@ -33,8 +33,8 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; /** @@ -50,13 +50,13 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException { // redirect to the better version: - String url = result.getAbsoluteUrl(); - String captureDate = result.getCaptureDate(); + String url = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); String betterURI = uriConverter.makeReplayURI(captureDate,url); httpResponse.sendRedirect(betterURI); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java 2008-07-01 23:34:52 UTC (rev 2365) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderOperation.java 2008-07-01 23:36:18 UTC (rev 2366) @@ -32,7 +32,7 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.BadContentException; /** @@ -70,7 +70,7 @@ * @return */ public static Map<String,String> processHeaders(Resource resource, - SearchResult result, ResultURIConverter uriConverter, + CaptureSearchResult result, ResultURIConverter uriConverter, HttpHeaderProcessor filter) { HashMap<String,String> output = new HashMap<String,String>(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java 2008-07-01 23:34:52 UTC (rev 2365) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HttpHeaderProcessor.java 2008-07-01 23:36:18 UTC (rev 2366) @@ -27,7 +27,7 @@ import java.util.Map; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; /** * @@ -63,5 +63,5 @@ * @param result */ public void filter(Map<String,String> output, String key, String value, - final ResultURIConverter uriConverter, SearchResult result); + final ResultURIConverter uriConverter, CaptureSearchResult result); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java 2008-07-01 23:34:52 UTC (rev 2365) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TransparentReplayRenderer.java 2008-07-01 23:36:18 UTC (rev 2366) @@ -35,8 +35,8 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; @@ -57,8 +57,8 @@ */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, - SearchResult result, Resource resource, - ResultURIConverter uriConverter, SearchResults results) + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); @@ -80,7 +80,7 @@ * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, - ResultURIConverter uriConverter, SearchResult result) { + ResultURIConverter uriConverter, CaptureSearchResult result) { // copy all HTTP headers, as-is. output.put(key, value); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:34:43
|
Revision: 2365 http://archive-access.svn.sourceforge.net/archive-access/?rev=2365&view=rev Author: bradtofel Date: 2008-07-01 16:34:52 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -29,7 +29,7 @@ import java.util.Comparator; import java.util.List; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.cdx.CDXIndex; import org.archive.wayback.util.CloseableIterator; @@ -67,12 +67,12 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { - Comparator<SearchResult> comparator = new SearchResultComparator(); - CompositeSortedIterator<SearchResult> itr = - new CompositeSortedIterator<SearchResult>(comparator); + Comparator<CaptureSearchResult> comparator = new SearchResultComparator(); + CompositeSortedIterator<CaptureSearchResult> itr = + new CompositeSortedIterator<CaptureSearchResult>(comparator); for (int i = 0; i < sources.size(); i++) { itr.addComponent(sources.get(i).getPrefixIterator(prefix)); } @@ -84,12 +84,12 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator( + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator( String prefix) throws ResourceIndexNotAvailableException { - Comparator<SearchResult> comparator = new SearchResultComparator(true); - CompositeSortedIterator<SearchResult> itr = - new CompositeSortedIterator<SearchResult>(comparator); + Comparator<CaptureSearchResult> comparator = new SearchResultComparator(true); + CompositeSortedIterator<CaptureSearchResult> itr = + new CompositeSortedIterator<CaptureSearchResult>(comparator); for (int i = 0; i < sources.size(); i++) { itr.addComponent(sources.get(i).getPrefixReverseIterator(prefix)); } @@ -99,7 +99,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException{ + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException{ c.close(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -2,8 +2,7 @@ import java.util.HashMap; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; /** @@ -25,46 +24,38 @@ * @version $Date$, $Revision$ */ public class DeduplicationSearchResultAnnotationAdapter -implements Adapter<SearchResult,SearchResult> { +implements Adapter<CaptureSearchResult,CaptureSearchResult> { private final static String EMPTY_VALUE = "-"; - // these fields are all copied to deduped records as-is: - private final static String FIELDS[] = { - WaybackConstants.RESULT_ARC_FILE, - WaybackConstants.RESULT_OFFSET, - WaybackConstants.RESULT_HTTP_CODE, - WaybackConstants.RESULT_MIME_TYPE, - WaybackConstants.RESULT_REDIRECT_URL, - }; - private HashMap<String,SearchResult> memory = null; + private HashMap<String,CaptureSearchResult> memory = null; public DeduplicationSearchResultAnnotationAdapter() { - memory = new HashMap<String,SearchResult>(); + memory = new HashMap<String,CaptureSearchResult>(); } - private SearchResult annotate(SearchResult o) { - String thisDigest = o.get(WaybackConstants.RESULT_MD5_DIGEST); - SearchResult last = memory.get(thisDigest); + private CaptureSearchResult annotate(CaptureSearchResult o) { + String thisDigest = o.getDigest(); + CaptureSearchResult last = memory.get(thisDigest); if(last == null) { + // TODO: log missing record digest reference return null; } - for(String field : FIELDS) { - o.put(field, last.get(field)); - } - o.put(WaybackConstants.RESULT_DUPLICATE_ANNOTATION, - WaybackConstants.RESULT_DUPLICATE_DIGEST); - o.put(WaybackConstants.RESULT_DUPLICATE_STORED_DATE, - last.get(WaybackConstants.RESULT_CAPTURE_DATE)); + o.setFile(last.getFile()); + o.setOffset(last.getOffset()); + o.setHttpCode(last.getHttpCode()); + o.setMimeType(last.getMimeType()); + o.setRedirectUrl(last.getRedirectUrl()); + o.flagDuplicateDigest(last.getCaptureTimestamp()); return o; } - private SearchResult remember(SearchResult o) { - memory.put(o.get(WaybackConstants.RESULT_MD5_DIGEST),o); + private CaptureSearchResult remember(CaptureSearchResult o) { + memory.put(o.getDigest(),o); return o; } - public SearchResult adapt(SearchResult o) { - if(o.get(FIELDS[0]).equals(EMPTY_VALUE)) { + public CaptureSearchResult adapt(CaptureSearchResult o) { + if(o.getFile().equals(EMPTY_VALUE)) { return annotate(o); } return remember(o); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -26,8 +26,7 @@ import java.util.Comparator; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; /** * @@ -35,7 +34,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class SearchResultComparator implements Comparator<SearchResult> { +public class SearchResultComparator implements Comparator<CaptureSearchResult> { private boolean backwards; /** @@ -52,15 +51,15 @@ backwards = false; } - private String objectToKey(SearchResult r) { - String urlKey = r.get(WaybackConstants.RESULT_URL_KEY); - String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); + private String objectToKey(CaptureSearchResult r) { + String urlKey = r.getUrlKey(); + String captureDate = r.getCaptureTimestamp(); return urlKey + " " + captureDate; } /* (non-Javadoc) * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ - public int compare(SearchResult o1, SearchResult o2) { + public int compare(CaptureSearchResult o1, CaptureSearchResult o2) { String k1 = objectToKey(o1); String k2 = objectToKey(o2); if(backwards) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -26,7 +26,7 @@ import java.io.IOException; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.util.CloseableIterator; @@ -44,7 +44,7 @@ * results. * @throws ResourceIndexNotAvailableException */ - public CloseableIterator<SearchResult> getPrefixIterator(final String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(final String prefix) throws ResourceIndexNotAvailableException; /** @@ -54,14 +54,14 @@ * results. * @throws ResourceIndexNotAvailableException */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(final String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(final String prefix) throws ResourceIndexNotAvailableException; /** * @param c * @throws IOException */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException; + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException; /** * @param c Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -28,7 +28,7 @@ import java.util.Iterator; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; /** * @@ -37,6 +37,6 @@ * @version $Date$, $Revision$ */ public interface UpdatableSearchResultSource extends SearchResultSource { - public void addSearchResults(Iterator<SearchResult> itr, + public void addSearchResults(Iterator<CaptureSearchResult> itr, UrlCanonicalizer canonicalizer) throws IOException; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:33:26
|
Revision: 2364 http://archive-access.svn.sourceforge.net/archive-access/?rev=2364&view=rev Author: bradtofel Date: 2008-07-01 16:33:35 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -33,7 +33,7 @@ import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.UpdatableSearchResultSource; @@ -66,9 +66,10 @@ initializeDB(bdbPath,bdbName); } - private CloseableIterator<SearchResult> adaptIterator( + private CloseableIterator<CaptureSearchResult> adaptIterator( Iterator<BDBRecord> itr) { - return new AdaptedIterator<BDBRecord,SearchResult>(itr,new BDBRecordToSearchResultAdapter()); + return new AdaptedIterator<BDBRecord,CaptureSearchResult>(itr, + new BDBRecordToSearchResultAdapter()); } /* @@ -76,7 +77,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { try { @@ -91,7 +92,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(recordIterator(prefix,false)); @@ -103,20 +104,20 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException { + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException { c.close(); } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.UpdatableSearchResultSource#addSearchResults(java.util.Iterator) */ - public void addSearchResults(Iterator<SearchResult> itr, + public void addSearchResults(Iterator<CaptureSearchResult> itr, UrlCanonicalizer canonicalizer) throws IOException { - Adapter<SearchResult,BDBRecord> adapterSRtoBDB = + Adapter<CaptureSearchResult,BDBRecord> adapterSRtoBDB = new SearchResultToBDBRecordAdapter(canonicalizer); Iterator<BDBRecord> itrBDB = - new AdaptedIterator<SearchResult,BDBRecord>(itr, + new AdaptedIterator<CaptureSearchResult,BDBRecord>(itr, adapterSRtoBDB); insertRecords(itrBDB); @@ -158,8 +159,8 @@ if(op.compareTo("-r") == 0) { PrintWriter pw = new PrintWriter(System.out); - CloseableIterator<SearchResult> itrSR = null; - Adapter<SearchResult,String> adapter = + CloseableIterator<CaptureSearchResult> itrSR = null; + Adapter<CaptureSearchResult,String> adapter = new SearchResultToCDXLineAdapter(); CloseableIterator<String> itrS; @@ -171,7 +172,7 @@ e.printStackTrace(); System.exit(1); } - itrS = new AdaptedIterator<SearchResult,String>(itrSR,adapter); + itrS = new AdaptedIterator<CaptureSearchResult,String>(itrSR,adapter); while(itrS.hasNext()) { String line = itrS.next(); if(!line.startsWith(prefix)) { @@ -187,7 +188,7 @@ e.printStackTrace(); System.exit(1); } - itrS = new AdaptedIterator<SearchResult,String>(itrSR,adapter); + itrS = new AdaptedIterator<CaptureSearchResult,String>(itrSR,adapter); while(itrS.hasNext()) { pw.println(itrS.next()); @@ -212,11 +213,11 @@ RecordIterator itrS = new RecordIterator(br); - Adapter<String,SearchResult> adapterStoSR = + Adapter<String,CaptureSearchResult> adapterStoSR = new CDXLineToSearchResultAdapter(); - Iterator<SearchResult> itrSR = - new AdaptedIterator<String,SearchResult>(itrS,adapterStoSR); + Iterator<CaptureSearchResult> itrSR = + new AdaptedIterator<String,CaptureSearchResult>(itrS,adapterStoSR); try { index.addSearchResults(itrSR, canonicalizer); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -27,7 +27,7 @@ import java.io.UnsupportedEncodingException; import org.archive.wayback.bdb.BDBRecord; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; import org.archive.wayback.util.Adapter; @@ -38,7 +38,7 @@ * @version $Date$, $Revision$ */ public class BDBRecordToSearchResultAdapter - implements Adapter<BDBRecord,SearchResult> { + implements Adapter<BDBRecord,CaptureSearchResult> { private static int DEFAULT_SB_SIZE = 100; private StringBuilder sb; @@ -53,7 +53,7 @@ * @param record * @return SearchResult representation of input BDBRecord */ - public SearchResult adapt(BDBRecord record) { + public CaptureSearchResult adapt(BDBRecord record) { sb.setLength(0); try { String key = new String(record.getKey().getData(),"UTF-8"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -28,10 +28,9 @@ import org.apache.commons.httpclient.URIException; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; import com.sleepycat.je.DatabaseEntry; @@ -43,7 +42,7 @@ * @version $Date$, $Revision$ */ public class SearchResultToBDBRecordAdapter implements - Adapter<SearchResult,BDBRecord> { + Adapter<CaptureSearchResult,BDBRecord> { private static final Logger LOGGER = Logger.getLogger(SearchResultToBDBRecordAdapter.class.getName()); @@ -66,11 +65,11 @@ * * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public BDBRecord adapt(SearchResult result) { + public BDBRecord adapt(CaptureSearchResult result) { StringBuilder keySB = new StringBuilder(40); StringBuilder valSB = new StringBuilder(100); - String origUrl = result.getAbsoluteUrl(); + String origUrl = result.getOriginalUrl(); String urlKey; try { urlKey = canonicalizer.urlStringToKey(origUrl); @@ -81,22 +80,22 @@ } keySB.append(urlKey); keySB.append(DELIMITER); - keySB.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + keySB.append(result.getCaptureTimestamp()); keySB.append(DELIMITER); - keySB.append(result.get(WaybackConstants.RESULT_OFFSET)); + keySB.append(result.getOffset()); keySB.append(DELIMITER); - keySB.append(result.get(WaybackConstants.RESULT_ARC_FILE)); + keySB.append(result.getFile()); - valSB.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); + valSB.append(result.getOriginalUrl()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); + valSB.append(result.getMimeType()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); + valSB.append(result.getHttpCode()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); + valSB.append(result.getDigest()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); + valSB.append(result.getRedirectUrl()); key.setData(BDBRecordSet.stringToBytes(keySB.toString())); value.setData(BDBRecordSet.stringToBytes(valSB.toString())); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -26,10 +26,9 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Date; import java.util.Iterator; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.SearchResultSource; import org.archive.wayback.util.AdaptedIterator; @@ -50,15 +49,15 @@ */ private static final long serialVersionUID = 1L; - private CloseableIterator<SearchResult> adaptIterator(Iterator<String> itr) { - return new AdaptedIterator<String,SearchResult>(itr, + private CloseableIterator<CaptureSearchResult> adaptIterator(Iterator<String> itr) { + return new AdaptedIterator<String,CaptureSearchResult>(itr, new CDXLineToSearchResultAdapter()); } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(getRecordIterator(prefix)); @@ -70,7 +69,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(getReverseRecordIterator(prefix)); @@ -82,10 +81,10 @@ /** * @param prefix - * @return Iterator of SearchResults of records starting with prefix + * @return Iterator of CaptureSearchResult of records starting with prefix * @throws IOException */ - public Iterator<SearchResult> getUrlIterator(final String prefix) throws IOException { + public Iterator<CaptureSearchResult> getUrlIterator(final String prefix) throws IOException { return adaptIterator(getRecordIterator(prefix)); } @@ -95,38 +94,36 @@ * @return Iterator of results in closest order to wantTS * @throws IOException */ - public Iterator<SearchResult> getClosestIterator(final String prefix, - final Timestamp wantTS) throws IOException { + public Iterator<CaptureSearchResult> getClosestIterator(final String prefix, + final Date wantDate) throws IOException { - Iterator<SearchResult> forwardItr = adaptIterator(getRecordIterator(prefix)); - Iterator<SearchResult> reverseItr = adaptIterator(getReverseRecordIterator(prefix)); - Comparator<SearchResult> comparator = new TimestampComparator(wantTS); - CompositeSortedIterator<SearchResult> itr = - new CompositeSortedIterator<SearchResult>(comparator); + Iterator<CaptureSearchResult> forwardItr = adaptIterator(getRecordIterator(prefix)); + Iterator<CaptureSearchResult> reverseItr = adaptIterator(getReverseRecordIterator(prefix)); + Comparator<CaptureSearchResult> comparator = new CaptureSRComparator(wantDate); + CompositeSortedIterator<CaptureSearchResult> itr = + new CompositeSortedIterator<CaptureSearchResult>(comparator); itr.addComponent(forwardItr); itr.addComponent(reverseItr); return itr; } - private class TimestampComparator implements Comparator<SearchResult> { - private int wantedSSE; + private class CaptureSRComparator implements Comparator<CaptureSearchResult> { + private long wantTime; /** * @param wanted */ - public TimestampComparator(Timestamp wanted) { - wantedSSE = wanted.sse(); + public CaptureSRComparator(Date wanted) { + wantTime = wanted.getTime(); } - private int searchResultToDistance(SearchResult sr) { - String dateStr = sr.get(WaybackConstants.RESULT_CAPTURE_DATE); - Timestamp ts = new Timestamp(dateStr); - return Math.abs(wantedSSE - ts.sse()); + private long searchResultToDistance(CaptureSearchResult sr) { + return Math.abs(wantTime - sr.getCaptureDate().getTime()); } /* (non-Javadoc) * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ - public int compare(SearchResult o1, SearchResult o2) { - int d1 = searchResultToDistance(o1); - int d2 = searchResultToDistance(o2); + public int compare(CaptureSearchResult o1, CaptureSearchResult o2) { + long d1 = searchResultToDistance(o1); + long d2 = searchResultToDistance(o2); if(d1 < d2) { return -1; } else if(d1 > d2) { @@ -139,7 +136,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException { + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException { c.close(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -25,76 +25,53 @@ package org.archive.wayback.resourceindex.cdx; -import org.apache.commons.httpclient.URIException; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; /** - * Adapter that converts a CDX record String into a SearchResult + * Adapter that converts a CDX record String into a CaptureSearchResult * * @author brad * @version $Date$, $Revision$ */ -public class CDXLineToSearchResultAdapter implements Adapter<String,SearchResult> { +public class CDXLineToSearchResultAdapter implements Adapter<String,CaptureSearchResult> { - public SearchResult adapt(String line) { + public CaptureSearchResult adapt(String line) { return doAdapt(line); } /** * @param line * @return SearchResult representation of input line */ - public static SearchResult doAdapt(String line) { - SearchResult result = new SearchResult(); + public static CaptureSearchResult doAdapt(String line) { + CaptureSearchResult result = new CaptureSearchResult(); String[] tokens = line.split(" "); if (tokens.length != 9) { return null; //throw new IllegalArgumentException("Need 9 columns("+line+")"); } - String url = tokens[0]; - String captureDate = tokens[1]; - String origHost = tokens[2]; + String urlKey = tokens[0]; + String captureTS = tokens[1]; + String originalUrl = tokens[2]; String mimeType = tokens[3]; - String httpResponseCode = tokens[4]; - String md5Fragment = tokens[5]; + String httpCode = tokens[4]; + String digest = tokens[5]; String redirectUrl = tokens[6]; long compressedOffset = -1; if(!tokens[7].equals("-")) { compressedOffset = Long.parseLong(tokens[7]); } - String arcFileName = tokens[8]; + String fileName = tokens[8]; + result.setUrlKey(urlKey); + result.setCaptureTimestamp(captureTS); + result.setOriginalUrl(originalUrl); + result.setMimeType(mimeType); + result.setHttpCode(httpCode); + result.setDigest(digest); + result.setRedirectUrl(redirectUrl); + result.setOffset(compressedOffset); + result.setFile(fileName); - String origUrl = url; - if(!url.startsWith(WaybackConstants.DNS_URL_PREFIX)) { - try { - UURI uri = UURIFactory.getInstance( - WaybackConstants.HTTP_URL_PREFIX + url); - if(uri.getPort() != -1) { - origHost += ":" + uri.getPort(); - } - origUrl = origHost + uri.getEscapedPathQuery(); - } catch (URIException e) { - // TODO Stifle? throw an error? - e.printStackTrace(); - return null; - } - } - - result.put(WaybackConstants.RESULT_URL, origUrl); - result.put(WaybackConstants.RESULT_URL_KEY, url); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, captureDate); - result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); - result.put(WaybackConstants.RESULT_MIME_TYPE, mimeType); - result.put(WaybackConstants.RESULT_HTTP_CODE, httpResponseCode); - result.put(WaybackConstants.RESULT_MD5_DIGEST, md5Fragment); - result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl); - // HACKHACK: - result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(compressedOffset)); - result.put(WaybackConstants.RESULT_ARC_FILE, arcFileName); - return result; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -26,8 +26,7 @@ import java.util.Iterator; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; @@ -38,7 +37,7 @@ * @version $Date$, $Revision$ */ public class SearchResultToCDXLineAdapter implements -Adapter<SearchResult,String>{ +Adapter<CaptureSearchResult,String>{ private static int DEFAULT_CAPACITY = 120; private final static String DELIMITER = " "; @@ -46,33 +45,33 @@ /* (non-Javadoc) * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public String adapt(SearchResult result) { + public String adapt(CaptureSearchResult result) { StringBuilder sb = new StringBuilder(DEFAULT_CAPACITY); - sb.append(result.get(WaybackConstants.RESULT_URL_KEY)); + sb.append(result.getUrlKey()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + sb.append(result.getCaptureTimestamp()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); + sb.append(result.getOriginalUrl()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); + sb.append(result.getMimeType()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); + sb.append(result.getHttpCode()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); + sb.append(result.getDigest()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); + sb.append(result.getRedirectUrl()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_OFFSET)); + sb.append(result.getOffset()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_ARC_FILE)); + sb.append(result.getFile()); return sb.toString(); } - public static Iterator<String> adapt(Iterator<SearchResult> input) { - return new AdaptedIterator<SearchResult,String>(input, + public static Iterator<String> adapt(Iterator<CaptureSearchResult> input) { + return new AdaptedIterator<CaptureSearchResult,String>(input, new SearchResultToCDXLineAdapter()); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -35,7 +35,7 @@ import java.util.logging.Logger; import java.util.regex.Pattern; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.FileDownloader; @@ -151,7 +151,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { if(getState() != STATE_SYNCHED) { throw new ResourceIndexNotAvailableException("Not synchronized"); @@ -164,7 +164,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { if(getState() != STATE_SYNCHED) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:32:21
|
Revision: 2363 http://archive-access.svn.sourceforge.net/archive-access/?rev=2363&view=rev Author: bradtofel Date: 2008-07-01 16:32:29 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CounterFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateRangeFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/EndDateFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/HostMatchFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/StartDateFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlMatchFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlPrefixMatchFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowEndFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowStartFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CompositeExclusionFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -27,7 +27,7 @@ import java.util.ArrayList; import java.util.Iterator; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -38,22 +38,22 @@ * @author brad * @version $Date$, $Revision$ */ -public class CompositeExclusionFilter implements ObjectFilter<SearchResult> { +public class CompositeExclusionFilter implements ObjectFilter<CaptureSearchResult> { - private ArrayList<ObjectFilter<SearchResult>> filters = - new ArrayList<ObjectFilter<SearchResult>>(); + private ArrayList<ObjectFilter<CaptureSearchResult>> filters = + new ArrayList<ObjectFilter<CaptureSearchResult>>(); /** * @param filter to be added to the composite. */ - public void addComponent(ObjectFilter<SearchResult> filter) { + public void addComponent(ObjectFilter<CaptureSearchResult> filter) { filters.add(filter); } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ - public int filterObject(SearchResult r) { - Iterator<ObjectFilter<SearchResult>> itr = filters.iterator(); + public int filterObject(CaptureSearchResult r) { + Iterator<ObjectFilter<CaptureSearchResult>> itr = filters.iterator(); while(itr.hasNext()) { int result = itr.next().filterObject(r); if(result != FILTER_INCLUDE) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CounterFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CounterFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/CounterFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,7 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -34,7 +34,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class CounterFilter implements ObjectFilter<SearchResult> { +public class CounterFilter implements ObjectFilter<CaptureSearchResult> { private int numMatched = 0; @@ -46,9 +46,9 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { + public int filterObject(CaptureSearchResult r) { numMatched++; return FILTER_INCLUDE; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateRangeFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateRangeFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DateRangeFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,8 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Timestamp; import org.archive.wayback.util.ObjectFilter; @@ -36,7 +35,7 @@ * @version $Date$, $Revision$ */ -public class DateRangeFilter implements ObjectFilter<SearchResult> { +public class DateRangeFilter implements ObjectFilter<CaptureSearchResult> { private String first = null; private String last = null; @@ -51,10 +50,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); + public int filterObject(CaptureSearchResult r) { + String captureDate = r.getCaptureTimestamp(); return ((first.compareTo(captureDate) > 0) || (last.compareTo(captureDate) < 0)) ? FILTER_EXCLUDE : FILTER_INCLUDE; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -1,22 +1,25 @@ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** * ObjectFilter which omits exact duplicate URL+date records from a stream - * of SearchResults. + * of CaptureSearchResult. * * @author brad * @version $Date$, $Revision$ */ -public class DuplicateRecordFilter implements ObjectFilter<SearchResult> { +public class DuplicateRecordFilter implements ObjectFilter<CaptureSearchResult> { private String lastUrl = null; private String lastDate = null; - public int filterObject(SearchResult o) { - String thisUrl = o.getUrl(); - String thisDate = o.getCaptureDate(); + /* (non-Javadoc) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) + */ + public int filterObject(CaptureSearchResult o) { + String thisUrl = o.getUrlKey(); + String thisDate = o.getCaptureTimestamp(); int result = ObjectFilter.FILTER_INCLUDE; if(lastUrl != null) { if(lastUrl.equals(thisUrl) && thisDate.equals(lastDate)) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/EndDateFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/EndDateFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/EndDateFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,8 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Timestamp; import org.archive.wayback.util.ObjectFilter; @@ -39,7 +38,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class EndDateFilter implements ObjectFilter<SearchResult> { +public class EndDateFilter implements ObjectFilter<CaptureSearchResult> { private String endDate = null; /** @@ -50,10 +49,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); + public int filterObject(CaptureSearchResult r) { + String captureDate = r.getCaptureTimestamp(); return (endDate.substring(0,captureDate.length()).compareTo( captureDate) < 0) ? Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/GuardRailFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,7 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -34,7 +34,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class GuardRailFilter implements ObjectFilter<SearchResult> { +public class GuardRailFilter implements ObjectFilter<CaptureSearchResult> { private int maxRecordsToScan = 0; private int recordsScanned = 0; @@ -47,9 +47,9 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { + public int filterObject(CaptureSearchResult r) { recordsScanned++; if(recordsScanned > maxRecordsToScan) { return FILTER_ABORT; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/HostMatchFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/HostMatchFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/HostMatchFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,8 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -35,7 +34,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class HostMatchFilter implements ObjectFilter<SearchResult> { +public class HostMatchFilter implements ObjectFilter<CaptureSearchResult> { private String hostname = null; @@ -47,10 +46,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String origHost = r.get(WaybackConstants.RESULT_ORIG_HOST); + public int filterObject(CaptureSearchResult r) { + String origHost = r.getOriginalHost(); return hostname.equals(origHost) ? FILTER_INCLUDE : FILTER_EXCLUDE; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -26,8 +26,7 @@ import java.util.HashMap; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -37,7 +36,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class MimeTypeFilter implements ObjectFilter<SearchResult> { +public class MimeTypeFilter implements ObjectFilter<CaptureSearchResult> { private HashMap<String,Integer> validMimes = null; /** @@ -51,10 +50,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String mime = r.get(WaybackConstants.RESULT_MIME_TYPE).toLowerCase(); + public int filterObject(CaptureSearchResult r) { + String mime = r.getMimeType().toLowerCase(); return validMimes.containsKey(mime) ? FILTER_INCLUDE : FILTER_EXCLUDE; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -29,8 +29,7 @@ import org.archive.accesscontrol.AccessControlClient; import org.archive.accesscontrol.RuleOracleUnavailableException; import org.archive.accesscontrol.model.Rule; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -40,17 +39,17 @@ * @author brad * @version $Date$, $Revision$ */ -public class OracleAnnotationFilter implements ObjectFilter<SearchResult> { +public class OracleAnnotationFilter implements ObjectFilter<CaptureSearchResult> { private AccessControlClient client = null; private String oracleUrl = null; private String who = null; /* (non-Javadoc) * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult o) { + public int filterObject(CaptureSearchResult o) { if(client != null) { - String url = o.getAbsoluteUrl(); - Date capDate = Timestamp.parseAfter(o.getCaptureDate()).getDate(); + String url = o.getOriginalUrl(); + Date capDate = o.getCaptureDate(); try { Rule r = client.getRule(url, capDate, new Date(), who); if(r != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/SelfRedirectFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -26,8 +26,7 @@ import org.apache.commons.httpclient.URIException; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; @@ -38,22 +37,22 @@ * @author brad * @version $Date$, $Revision$ */ -public class SelfRedirectFilter implements ObjectFilter<SearchResult> { +public class SelfRedirectFilter implements ObjectFilter<CaptureSearchResult> { private UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); public SelfRedirectFilter() { canonicalizer = new AggressiveUrlCanonicalizer(); } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String httpCode = r.get(WaybackConstants.RESULT_HTTP_CODE); + public int filterObject(CaptureSearchResult r) { + String httpCode = r.getHttpCode(); // only filter real 3XX http response codes: if(httpCode.startsWith("3")) { - String redirect = r.get(WaybackConstants.RESULT_REDIRECT_URL); + String redirect = r.getRedirectUrl(); if(redirect.compareTo("-") != 0) { - String urlKey = r.get(WaybackConstants.RESULT_URL_KEY); + String urlKey = r.getUrlKey(); try { String redirectKey = canonicalizer.urlStringToKey(redirect); if(redirectKey.compareTo(urlKey) == 0) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/StartDateFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/StartDateFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/StartDateFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,8 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.Timestamp; import org.archive.wayback.util.ObjectFilter; @@ -39,7 +38,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class StartDateFilter implements ObjectFilter<SearchResult> { +public class StartDateFilter implements ObjectFilter<CaptureSearchResult> { private String startDate = null; @@ -52,10 +51,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); + public int filterObject(CaptureSearchResult r) { + String captureDate = r.getCaptureTimestamp(); return (startDate.substring(0,captureDate.length()).compareTo( captureDate) > 0) ? FILTER_ABORT : FILTER_INCLUDE; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlMatchFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlMatchFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlMatchFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,8 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -35,7 +34,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class UrlMatchFilter implements ObjectFilter<SearchResult> { +public class UrlMatchFilter implements ObjectFilter<CaptureSearchResult> { private String url = null; @@ -47,10 +46,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String resultUrl = r.get(WaybackConstants.RESULT_URL_KEY); + public int filterObject(CaptureSearchResult r) { + String resultUrl = r.getUrlKey(); return url.equals(resultUrl) ? FILTER_INCLUDE : FILTER_ABORT; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlPrefixMatchFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlPrefixMatchFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UrlPrefixMatchFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,8 +24,7 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -37,7 +36,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class UrlPrefixMatchFilter implements ObjectFilter<SearchResult> { +public class UrlPrefixMatchFilter implements ObjectFilter<CaptureSearchResult> { private String prefix; @@ -49,10 +48,10 @@ } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { - String resultUrl = r.get(WaybackConstants.RESULT_URL_KEY); + public int filterObject(CaptureSearchResult r) { + String resultUrl = r.getUrlKey(); return resultUrl.startsWith(prefix) ? FILTER_INCLUDE : FILTER_ABORT; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowEndFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowEndFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowEndFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,7 +24,6 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -33,10 +32,11 @@ * @author brad * @version $Date$, $Revision$ */ -public class WindowEndFilter implements ObjectFilter<SearchResult> { +public class WindowEndFilter<T> implements ObjectFilter<T> { private int windowSize = 0; private int numSeen = 0; + private int numReturned = 0; /** * @param windowSize int number of records to include @@ -45,13 +45,16 @@ this.windowSize = windowSize; this.numSeen = 0; } - + public int getNumReturned() { + return numReturned; + } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { + public int filterObject(T r) { numSeen++; if(numSeen <= windowSize) { + numReturned++; return FILTER_INCLUDE; } return FILTER_EXCLUDE; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowStartFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowStartFilter.java 2008-07-01 23:31:37 UTC (rev 2362) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/WindowStartFilter.java 2008-07-01 23:32:29 UTC (rev 2363) @@ -24,7 +24,6 @@ */ package org.archive.wayback.resourceindex.filters; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -33,7 +32,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class WindowStartFilter implements ObjectFilter<SearchResult> { +public class WindowStartFilter<T> implements ObjectFilter<T> { private int windowStart = 0; private int numSeen = 0; @@ -45,11 +44,14 @@ this.windowStart = windowStart; this.numSeen = 0; } + public int getNumSeen() { + return numSeen; + } /* (non-Javadoc) - * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) */ - public int filterObject(SearchResult r) { + public int filterObject(T r) { numSeen++; if(numSeen > windowStart) { return FILTER_INCLUDE; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:31:28
|
Revision: 2362 http://archive-access.svn.sourceforge.net/archive-access/?rev=2362&view=rev Author: bradtofel Date: 2008-07-01 16:31:37 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java 2008-07-01 23:29:31 UTC (rev 2361) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java 2008-07-01 23:31:37 UTC (rev 2362) @@ -38,7 +38,7 @@ import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.InputStreamRequestEntity; import org.apache.commons.httpclient.methods.PutMethod; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; @@ -137,7 +137,7 @@ * @throws HttpException * @throws IOException */ - public boolean addSearchResults(String base, Iterator<SearchResult> itr) + public boolean addSearchResults(String base, Iterator<CaptureSearchResult> itr) throws HttpException, IOException { if(tmpDir == null) { @@ -155,10 +155,10 @@ BufferedOutputStream bos = new BufferedOutputStream(os); PrintWriter pw = new PrintWriter(bos); - Adapter<SearchResult,String> adapterSRtoS = + Adapter<CaptureSearchResult,String> adapterSRtoS = new SearchResultToCDXLineAdapter(); Iterator<String> itrS = - new AdaptedIterator<SearchResult,String>(itr,adapterSRtoS); + new AdaptedIterator<CaptureSearchResult,String>(itr,adapterSRtoS); while(itrS.hasNext()) { pw.println(itrS.next()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java 2008-07-01 23:29:31 UTC (rev 2361) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java 2008-07-01 23:31:37 UTC (rev 2362) @@ -28,7 +28,7 @@ import java.io.IOException; import java.util.logging.Logger; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.resourceindex.LocalResourceIndex; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; @@ -97,8 +97,8 @@ boolean added = false; try { FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); - AdaptedIterator<String,SearchResult> searchResultItr = - new AdaptedIterator<String,SearchResult>( + AdaptedIterator<String,CaptureSearchResult> searchResultItr = + new AdaptedIterator<String,CaptureSearchResult>( ffile.getSequentialIterator(), new CDXLineToSearchResultAdapter()); index.addSearchResults(searchResultItr); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:29:23
|
Revision: 2361 http://archive-access.svn.sourceforge.net/archive-access/?rev=2361&view=rev Author: bradtofel Date: 2008-07-01 16:29:31 -0700 (Tue, 01 Jul 2008) Log Message: ----------- FEATURE: added urlToHost() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2008-07-01 23:27:49 UTC (rev 2360) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2008-07-01 23:29:31 UTC (rev 2361) @@ -16,6 +16,29 @@ */ public class UrlOperations { + public final static String DNS_SCHEME = "dns:"; + public final static String HTTP_SCHEME = "http://"; + public final static String HTTPS_SCHEME = "https://"; + public final static String FTP_SCHEME = "ftp://"; + public final static String MMS_SCHEME = "mms://"; + public final static String RTSP_SCHEME = "rtsp://"; + // go brewster + public final static String WAIS_SCHEME = "wais://"; + + public final static String ALL_SCHEMES[] = { + HTTP_SCHEME, + HTTPS_SCHEME, + FTP_SCHEME, + MMS_SCHEME, + RTSP_SCHEME, + WAIS_SCHEME + }; + + + public final static char PORT_SEPARATOR = ':'; + public final static char PATH_START = '/'; + + private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" + "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" + "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" + @@ -73,4 +96,32 @@ } return resolvedURI.getEscapedURI(); } + + public static String urlToHost(String url) { + if(url.startsWith("dns:")) { + return url.substring(4); + } + for(String scheme : ALL_SCHEMES) { + if(url.startsWith(scheme)) { + int hostIdx = scheme.length(); + int portIdx = url.indexOf(PORT_SEPARATOR, hostIdx + 1); + int pathIdx = url.indexOf(PATH_START, hostIdx + 1); + if(portIdx == -1 && pathIdx == -1) { + return url.substring(hostIdx); + } + if(portIdx == -1) { + return url.substring(hostIdx,pathIdx); + } + if(pathIdx == -1) { + return url.substring(hostIdx,portIdx); + } + if(pathIdx > portIdx) { + return url.substring(hostIdx,portIdx); + } else { + return url.substring(hostIdx,pathIdx); + } + } + } + return url; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2008-07-01 23:27:49 UTC (rev 2360) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2008-07-01 23:29:31 UTC (rev 2361) @@ -27,5 +27,42 @@ } else { assertTrue("String("+s+") is not an Authority",want == got); } - } + } + public void testUrlToHost() { + assertEquals("foo.com",UrlOperations.urlToHost("dns:foo.com")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path:/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/path:/")); + + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:27:40
|
Revision: 2360 http://archive-access.svn.sourceforge.net/archive-access/?rev=2360&view=rev Author: bradtofel Date: 2008-07-01 16:27:49 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java 2008-07-01 23:27:14 UTC (rev 2359) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java 2008-07-01 23:27:49 UTC (rev 2360) @@ -26,9 +26,8 @@ import java.util.ArrayList; import java.util.Iterator; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; /** * @@ -41,7 +40,7 @@ private String endDateStr = null; // exclusive private String title = null; - private ArrayList<SearchResult> matches = null; + private ArrayList<CaptureSearchResult> matches = null; /** * @return number of SearchResult objects in this partition @@ -61,7 +60,7 @@ this.startDateStr = startDateStr; this.endDateStr = endDateStr; this.title= title; - matches = new ArrayList<SearchResult>(); + matches = new ArrayList<CaptureSearchResult>(); } /** @@ -69,12 +68,11 @@ * within the time range of this partition into this partition. * @param results */ - public void filter(SearchResults results) { - Iterator<SearchResult> itr = results.iterator(); + public void filter(CaptureSearchResults results) { + Iterator<CaptureSearchResult> itr = results.iterator(); while(itr.hasNext()) { - SearchResult result = itr.next(); - String captureDate = result.get( - WaybackConstants.RESULT_CAPTURE_DATE); + CaptureSearchResult result = itr.next(); + String captureDate = result.getCaptureTimestamp(); if((captureDate.compareTo(startDateStr) >= 0) && (captureDate.compareTo(endDateStr) < 0)) { matches.add(result); @@ -92,7 +90,7 @@ /** * @return Returns the matches. */ - public ArrayList<SearchResult> getMatches() { + public ArrayList<CaptureSearchResult> getMatches() { return matches; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java 2008-07-01 23:27:14 UTC (rev 2359) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java 2008-07-01 23:27:49 UTC (rev 2360) @@ -30,7 +30,7 @@ import org.archive.util.ArchiveUtils; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; @@ -56,7 +56,7 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> get(SearchResults results, + public static ArrayList<ResultsPartition> get(CaptureSearchResults results, WaybackRequest wbRequest) { Timestamp startTS = Timestamp.parseBefore(results.getFilter( WaybackConstants.REQUEST_START_DATE)); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java 2008-07-01 23:27:14 UTC (rev 2359) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java 2008-07-01 23:27:49 UTC (rev 2360) @@ -29,7 +29,7 @@ import org.archive.util.ArchiveUtils; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; @@ -69,7 +69,7 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> getHour(SearchResults results, + public static ArrayList<ResultsPartition> getHour(CaptureSearchResults results, WaybackRequest wbRequest) { return get(hourRP,NUM_HOUR_PARTITIONS,results,wbRequest); } @@ -79,7 +79,7 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> getDay(SearchResults results, + public static ArrayList<ResultsPartition> getDay(CaptureSearchResults results, WaybackRequest wbRequest) { return get(dayRP,NUM_DAY_PARTITIONS,results,wbRequest); } @@ -89,7 +89,7 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> getMonth(SearchResults results, + public static ArrayList<ResultsPartition> getMonth(CaptureSearchResults results, WaybackRequest wbRequest) { return get(monthRP,NUM_MONTH_PARTITIONS,results,wbRequest); } @@ -99,7 +99,7 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> getTwoMonth(SearchResults results, + public static ArrayList<ResultsPartition> getTwoMonth(CaptureSearchResults results, WaybackRequest wbRequest) { return get(twoMonthRP,NUM_TWO_MONTH_PARTITIONS,results,wbRequest); } @@ -109,7 +109,7 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> getYear(SearchResults results, + public static ArrayList<ResultsPartition> getYear(CaptureSearchResults results, WaybackRequest wbRequest) { return get(yearRP,NUM_YEAR_PARTITIONS,results,wbRequest); } @@ -119,10 +119,10 @@ * @param wbRequest * @return ArrayList of ResultsPartition objects */ - public static ArrayList<ResultsPartition> getAuto(SearchResults results, + public static ArrayList<ResultsPartition> getAuto(CaptureSearchResults results, WaybackRequest wbRequest) { - int first = Timestamp.parseBefore(results.getFirstResultDate()).sse(); - int last = Timestamp.parseAfter(results.getLastResultDate()).sse(); + int first = Timestamp.parseBefore(results.getFirstResultTimestamp()).sse(); + int last = Timestamp.parseAfter(results.getLastResultTimestamp()).sse(); int diff = last - first; if(diff < MAX_HOUR_SECONDS) { return getHour(results,wbRequest); @@ -140,9 +140,9 @@ * @param results * @return String Constant of minimum resolution that will hold the results */ - public static String getMinResolution(SearchResults results) { - int first = Timestamp.parseBefore(results.getFirstResultDate()).sse(); - int last = Timestamp.parseAfter(results.getLastResultDate()).sse(); + public static String getMinResolution(CaptureSearchResults results) { + int first = Timestamp.parseBefore(results.getFirstResultTimestamp()).sse(); + int last = Timestamp.parseAfter(results.getLastResultTimestamp()).sse(); int diff = last - first; if(diff < MAX_HOUR_SECONDS) { return WaybackConstants.REQUEST_RESOLUTION_HOURS; @@ -157,7 +157,7 @@ } private static ArrayList<ResultsPartition> get(ResultsPartitioner - partitioner, int partitionCount, SearchResults results, + partitioner, int partitionCount, CaptureSearchResults results, WaybackRequest wbRequest) { ArrayList<ResultsPartition> partitions = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:27:09
|
Revision: 2359 http://archive-access.svn.sourceforge.net/archive-access/?rev=2359&view=rev Author: bradtofel Date: 2008-07-01 16:27:14 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2008-07-01 23:26:31 UTC (rev 2358) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2008-07-01 23:27:14 UTC (rev 2359) @@ -36,7 +36,7 @@ import org.archive.io.arc.ARCWriterPool; import org.archive.wayback.core.Resource; import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourcestore.ResourceFactory; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; import org.archive.wayback.util.DirMaker; /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-07-01 23:26:31 UTC (rev 2358) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-07-01 23:27:14 UTC (rev 2359) @@ -34,17 +34,17 @@ import org.archive.io.arc.ARCRecord; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; -import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; -import org.archive.wayback.resourcestore.ArcResource; +import org.archive.wayback.resourcestore.indexer.ARCRecordToSearchResultAdapter; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** @@ -100,8 +100,8 @@ return req; } - private boolean isForgedFailRecentEnough(SearchResult result) { - String captureDate = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + private boolean isForgedFailRecentEnough(CaptureSearchResult result) { + String captureDate = result.getCaptureTimestamp(); Timestamp t = new Timestamp(captureDate); long maxAge = System.currentTimeMillis() - maxFailedCacheMS; long failAge = t.getDate().getTime(); @@ -111,27 +111,25 @@ return false; } - private boolean isForgedFailedSearchResult(SearchResult result) { - String arcFile = result.get(WaybackConstants.RESULT_ARC_FILE); + private boolean isForgedFailedSearchResult(CaptureSearchResult result) { + String arcFile = result.getFile(); return arcFile.equals("-"); } - private SearchResult forgeFailedSearchResult(URL url) { - SearchResult result = new SearchResult(); + private CaptureSearchResult forgeFailedSearchResult(URL url) { + CaptureSearchResult result = new CaptureSearchResult(); - result.put(WaybackConstants.RESULT_ARC_FILE, "-"); - result.put(WaybackConstants.RESULT_OFFSET, "0"); + result.setFile("-"); + result.setOffset(0); - result.put(WaybackConstants.RESULT_HTTP_CODE, "0"); + result.setHttpCode("0"); - result.put(WaybackConstants.RESULT_MD5_DIGEST, "-"); - result.put(WaybackConstants.RESULT_MIME_TYPE, "-"); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, - Timestamp.currentTimestamp().getDateStr()); + result.setDigest("-"); + result.setMimeType("-"); + result.setCaptureDate(new Date()); - result.put(WaybackConstants.RESULT_ORIG_HOST, url.getHost()); - result.put(WaybackConstants.RESULT_REDIRECT_URL, "-"); - result.put(WaybackConstants.RESULT_URL, url.toString()); + result.setOriginalUrl(url.toString()); + result.setRedirectUrl("-"); String indexUrl; try { @@ -141,7 +139,7 @@ e.printStackTrace(); indexUrl = url.toString(); } - result.put(WaybackConstants.RESULT_URL_KEY, indexUrl); + result.setUrlKey(indexUrl); return result; } @@ -167,7 +165,7 @@ e.printStackTrace(); throw new IOException(e.getMessage()); } - SearchResult result = results.getClosest(wbRequest); + CaptureSearchResult result = results.getClosest(wbRequest); if(result != null) { if(isForgedFailedSearchResult(result)) { if(isForgedFailRecentEnough(result)) { @@ -178,9 +176,8 @@ throw new ResourceNotInArchiveException("Nope"); } } - String name = (String) result.get(WaybackConstants.RESULT_ARC_FILE); - long offset = Long.parseLong( - (String) result.get(WaybackConstants.RESULT_OFFSET)); + String name = result.getFile(); + long offset = result.getOffset(); resource = arcCacheDir.getResource(name, offset); } return resource; @@ -197,7 +194,7 @@ location = cacher.cache(arcCacheDir, url.toString()); } catch(LiveDocumentNotAvailableException e) { // record the failure, so we can fail early next time: - SearchResult result = forgeFailedSearchResult(url); + CaptureSearchResult result = forgeFailedSearchResult(url); index.addSearchResult(result); LOGGER.info("Added FAIL-URL(" + url.toString() + ") to LiveIndex"); throw e; @@ -213,7 +210,7 @@ ArcResource aResource = (ArcResource) resource; ARCRecord record = (ARCRecord) aResource.getArcRecord(); - SearchResult result = adapter.adapt(record); + CaptureSearchResult result = adapter.adapt(record); index.addSearchResult(result); LOGGER.info("Added URL(" + url.toString() + ") in " + "ARC(" + name + ") at (" + offset + ") to LiveIndex"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2008-07-01 23:26:31 UTC (rev 2358) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2008-07-01 23:27:14 UTC (rev 2359) @@ -27,7 +27,7 @@ import java.io.IOException; import java.util.ArrayList; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.LocalResourceIndex; /** @@ -46,10 +46,10 @@ * @throws UnsupportedOperationException */ @SuppressWarnings("unchecked") - public void addSearchResult(SearchResult result) + public void addSearchResult(CaptureSearchResult result) throws UnsupportedOperationException, IOException { - ArrayList<SearchResult> l = new ArrayList<SearchResult>(); + ArrayList<CaptureSearchResult> l = new ArrayList<CaptureSearchResult>(); l.add(result); addSearchResults(l.iterator()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:26:22
|
Revision: 2358 http://archive-access.svn.sourceforge.net/archive-access/?rev=2358&view=rev Author: bradtofel Date: 2008-07-01 16:26:31 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2008-07-01 23:25:55 UTC (rev 2357) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilter.java 2008-07-01 23:26:31 UTC (rev 2358) @@ -28,8 +28,7 @@ import java.util.logging.Logger; import org.apache.commons.httpclient.URIException; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.ObjectFilter; @@ -39,7 +38,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class StaticMapExclusionFilter implements ObjectFilter<SearchResult> { +public class StaticMapExclusionFilter implements ObjectFilter<CaptureSearchResult> { private static final Logger LOGGER = Logger.getLogger( StaticMapExclusionFilter.class.getName()); @@ -76,8 +75,8 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ - public int filterObject(SearchResult r) { - String url = r.get(WaybackConstants.RESULT_URL); + public int filterObject(CaptureSearchResult r) { + String url = r.getOriginalUrl(); if(lastChecked != null) { if(lastChecked.equals(url)) { return lastCheckedExcluded ? Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2008-07-01 23:25:55 UTC (rev 2357) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/staticmap/StaticMapExclusionFilterFactory.java 2008-07-01 23:26:31 UTC (rev 2358) @@ -31,7 +31,7 @@ import java.util.logging.Logger; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; @@ -107,7 +107,7 @@ * @param wbRequest * @return SearchResultFilter */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<CaptureSearchResult> get() { if(currentMap == null) { return null; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:25:46
|
Revision: 2357 http://archive-access.svn.sourceforge.net/archive-access/?rev=2357&view=rev Author: bradtofel Date: 2008-07-01 16:25:55 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2008-07-01 23:25:27 UTC (rev 2356) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2008-07-01 23:25:55 UTC (rev 2357) @@ -35,15 +35,14 @@ import java.util.regex.Pattern; import org.archive.util.ArchiveUtils; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; /** - * SearchResultFilter that uses a LiveWebCache to retrieve robots.txt documents + * CaptureSearchResult Filter that uses a LiveWebCache to retrieve robots.txt documents * from the live web, and filters SearchResults based on the rules therein. * * This class caches parsed RobotRules that are retrieved, so using the same @@ -56,7 +55,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class RobotExclusionFilter implements ObjectFilter<SearchResult> { +public class RobotExclusionFilter implements ObjectFilter<CaptureSearchResult> { private final static String HTTP_PREFIX = "http://"; private final static String ROBOT_SUFFIX = "/robots.txt"; @@ -127,10 +126,10 @@ return list; } - private RobotRules getRules(SearchResult result) { + private RobotRules getRules(CaptureSearchResult result) { RobotRules rules = null; RobotRules tmpRules = null; - String host = result.get(WaybackConstants.RESULT_ORIG_HOST); + String host = result.getOriginalHost(); List<String> urlStrings = searchResultToRobotUrlStrings(host); Iterator<String> itr = urlStrings.iterator(); String firstUrlString = null; @@ -174,12 +173,12 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ - public int filterObject(SearchResult r) { + public int filterObject(CaptureSearchResult r) { int filterResult = ObjectFilter.FILTER_EXCLUDE; RobotRules rules = getRules(r); if(rules != null) { - String resultURL = r.get(WaybackConstants.RESULT_URL); + String resultURL = r.getOriginalUrl(); URL url; try { url = new URL(ArchiveUtils.addImpliedHttpIfNecessary(resultURL)); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2008-07-01 23:25:27 UTC (rev 2356) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterFactory.java 2008-07-01 23:25:55 UTC (rev 2357) @@ -25,7 +25,7 @@ package org.archive.wayback.accesscontrol.robotstxt; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.util.ObjectFilter; @@ -44,7 +44,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<CaptureSearchResult> get() { return new RobotExclusionFilter(webCache,userAgent,maxCacheMS); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:25:17
|
Revision: 2356 http://archive-access.svn.sourceforge.net/archive-access/?rev=2356&view=rev Author: bradtofel Date: 2008-07-01 16:25:27 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2008-07-01 23:24:50 UTC (rev 2355) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilter.java 2008-07-01 23:25:27 UTC (rev 2356) @@ -32,8 +32,7 @@ import java.net.URLEncoder; import java.util.logging.Logger; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -43,7 +42,7 @@ * @author brad * @version $Date: 2006-10-17 15:21:15 -0700 (Tue, 17 Oct 2006) $, $Revision: 1276 $ */ -public class RemoteExclusionFilter implements ObjectFilter<SearchResult> { +public class RemoteExclusionFilter implements ObjectFilter<CaptureSearchResult> { private static final Logger LOGGER = Logger.getLogger(RemoteExclusionFilter.class .getName()); @@ -90,7 +89,7 @@ finalUrl.append(URL_ARGUMENT); finalUrl.append("="); try { - finalUrl.append(URLEncoder.encode("http://"+urlString,"UTF-8")); + finalUrl.append(URLEncoder.encode(urlString,"UTF-8")); } catch (UnsupportedEncodingException e) { // TODO what happenned here? e.printStackTrace(); @@ -146,9 +145,9 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ - public int filterObject(SearchResult r) { - String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); - String url = r.get(WaybackConstants.RESULT_URL); + public int filterObject(CaptureSearchResult r) { + String captureDate = r.getCaptureTimestamp(); + String url = r.getOriginalUrl(); return isBlocked(url,captureDate) ? FILTER_EXCLUDE : FILTER_INCLUDE; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2008-07-01 23:24:50 UTC (rev 2355) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/remote/RemoteExclusionFilterFactory.java 2008-07-01 23:25:27 UTC (rev 2356) @@ -25,7 +25,7 @@ package org.archive.wayback.accesscontrol.remote; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** @@ -43,7 +43,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public ObjectFilter<SearchResult> get() { + public ObjectFilter<CaptureSearchResult> get() { return new RemoteExclusionFilter(exclusionUrlPrefix, exclusionUserAgent); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |