Revision: 1788 http://archive-access.svn.sourceforge.net/archive-access/?rev=1788&view=rev Author: bradtofel Date: 2007-07-16 16:22:43 -0700 (Mon, 16 Jul 2007) Log Message: ----------- TWEAK: now aware of SearchResults types. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-07-16 23:21:27 UTC (rev 1787) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-07-16 23:22:43 UTC (rev 1788) @@ -34,9 +34,12 @@ import javax.xml.parsers.ParserConfigurationException; import org.archive.wayback.ResourceIndex; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.BadQueryException; @@ -181,9 +184,25 @@ } } } + private String getResultsType(Document document) { + NodeList list = document.getElementsByTagName( + WaybackConstants.RESULTS_TYPE); + if(list.getLength() == 1) { + return list.item(0).getTextContent(); + } else { + return WaybackConstants.RESULTS_TYPE_CAPTURE; + } + } + protected SearchResults documentToSearchResults(Document document) { - SearchResults results = new SearchResults(); + SearchResults results = null; NodeList filters = getRequestFilters(document); + String resultsType = getResultsType(document); + if(resultsType.equals(WaybackConstants.RESULTS_TYPE_CAPTURE)) { + results = new CaptureSearchResults(); + } else { + results = new UrlSearchResults(); + } for(int i = 0; i < filters.getLength(); i++) { String key = filters.item(i).getNodeName(); String value = filters.item(i).getTextContent(); @@ -196,7 +215,7 @@ for(int i = 0; i < xresults.getLength(); i++) { Node xresult = xresults.item(i); SearchResult result = searchElementToSearchResult(xresult); - results.addSearchResult(result); + results.addSearchResultRaw(result,true); } return results; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 1994 http://archive-access.svn.sourceforge.net/archive-access/?rev=1994&view=rev Author: bradtofel Date: 2007-09-18 16:43:14 -0700 (Tue, 18 Sep 2007) Log Message: ----------- BUGFIX: now remote query type is changed explicitly to urlclosestquery, also construct XML factory in class initializer. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-09-18 23:39:45 UTC (rev 1993) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-09-18 23:43:14 UTC (rev 1994) @@ -71,7 +71,7 @@ private String searchUrlBase; - private DocumentBuilderFactory factory; + private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); private static final String WB_XML_REQUEST_TAGNAME = "request"; @@ -109,7 +109,6 @@ public void init() throws ConfigurationException { LOGGER.info("initializing RemoteCDXIndex..."); - this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(false); LOGGER.info("Using base search url " + this.searchUrlBase); } @@ -252,7 +251,12 @@ protected String getRequestUrl(WaybackRequest wbRequest) throws BadQueryException { - return this.searchUrlBase + "?" + wbRequest.getQueryArguments(); + WaybackRequest tmp = wbRequest.clone(); + String type = tmp.get(WaybackConstants.REQUEST_TYPE); + if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY)) { + tmp.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_URL_QUERY); + } + return this.searchUrlBase + "?" + tmp.getQueryArguments(); } // extract the text content of a single tag under a node This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2016 http://archive-access.svn.sourceforge.net/archive-access/?rev=2016&view=rev Author: bradtofel Date: 2007-09-28 15:38:08 -0700 (Fri, 28 Sep 2007) Log Message: ----------- TWEAK: removed unused static property name. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-09-28 18:44:25 UTC (rev 2015) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-09-28 22:38:08 UTC (rev 2016) @@ -64,11 +64,6 @@ private static final Logger LOGGER = Logger.getLogger(RemoteResourceIndex .class.getName()); - /** - * name of the property value indicating the url prefix of the remote index. - */ - public final static String SEARCH_BASE_URL = "resourceindex.baseurl"; - private String searchUrlBase; private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2076 http://archive-access.svn.sourceforge.net/archive-access/?rev=2076&view=rev Author: bradtofel Date: 2007-11-05 19:52:49 -0800 (Mon, 05 Nov 2007) Log Message: ----------- BUGFIX: (unreported) now uses addSearchResult() so CaptureSearchResults can properly track first and last date present. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-11-06 03:51:43 UTC (rev 2075) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-11-06 03:52:49 UTC (rev 2076) @@ -203,7 +203,7 @@ for(int i = 0; i < xresults.getLength(); i++) { Node xresult = xresults.item(i); SearchResult result = searchElementToSearchResult(xresult); - results.addSearchResultRaw(result,true); + results.addSearchResult(result,true); } return results; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2244 http://archive-access.svn.sourceforge.net/archive-access/?rev=2244&view=rev Author: bradtofel Date: 2008-04-15 17:40:18 -0700 (Tue, 15 Apr 2008) Log Message: ----------- BUGFIX: ACC-15: RemoteResourceIndex was not using SelfRedirectFilter for replay requests, which caused redirect loops. Patch submitted by Alex Osborne. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-04-16 00:37:46 UTC (rev 2243) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-04-16 00:40:18 UTC (rev 2244) @@ -33,6 +33,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.archive.wayback.ResourceIndex; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; @@ -44,6 +45,10 @@ import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.resourceindex.filters.SelfRedirectFilter; +import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.util.ObjectFilterChain; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -75,6 +80,7 @@ private static final String WB_XML_ERROR_TAGNAME = "error"; private static final String WB_XML_ERROR_TITLE = "title"; private static final String WB_XML_ERROR_MESSAGE = "message"; + private UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); @SuppressWarnings("unchecked") private final ThreadLocal tl = new ThreadLocal() { @@ -117,13 +123,15 @@ ResourceNotInArchiveException, BadQueryException, AccessControlException { - return urlToSearchResults(getRequestUrl(wbRequest)); + return urlToSearchResults(getRequestUrl(wbRequest), + getSearchResultFilters(wbRequest)); } - protected SearchResults urlToSearchResults(String requestUrl) - throws ResourceIndexNotAvailableException, - ResourceNotInArchiveException, BadQueryException, - AccessControlException { + protected SearchResults urlToSearchResults(String requestUrl, + ObjectFilter<SearchResult> filter) + throws ResourceIndexNotAvailableException, + ResourceNotInArchiveException, BadQueryException, + AccessControlException { Document document = null; try { @@ -141,7 +149,7 @@ } checkDocumentForExceptions(document); - return documentToSearchResults(document); + return documentToSearchResults(document, filter); } protected void checkDocumentForExceptions(Document document) @@ -182,7 +190,27 @@ } } - protected SearchResults documentToSearchResults(Document document) { + protected ObjectFilter<SearchResult> getSearchResultFilters( + WaybackRequest wbRequest) { + String searchType = wbRequest.get(WaybackConstants.REQUEST_TYPE); + ObjectFilterChain<SearchResult> filters = + new ObjectFilterChain<SearchResult>(); + + if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY) + || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { + + SelfRedirectFilter selfRedirectFilter = new SelfRedirectFilter(); + selfRedirectFilter.setCanonicalizer(canonicalizer); + filters.addFilter(selfRedirectFilter); + } else { + // no filters for now + filters = null; + } + return filters; + } + + protected SearchResults documentToSearchResults(Document document, + ObjectFilter<SearchResult> filter) { SearchResults results = null; NodeList filters = getRequestFilters(document); String resultsType = getResultsType(document); @@ -203,7 +231,17 @@ for(int i = 0; i < xresults.getLength(); i++) { Node xresult = xresults.item(i); SearchResult result = searchElementToSearchResult(xresult); - results.addSearchResult(result,true); + + int ruling = ObjectFilter.FILTER_INCLUDE; + if (filter != null) { + ruling = filter.filterObject(result); + } + + if (ruling == ObjectFilter.FILTER_ABORT) { + break; + } else if (ruling == ObjectFilter.FILTER_INCLUDE) { + results.addSearchResult(result, true); + } } return results; } @@ -291,4 +329,12 @@ public void shutdown() throws IOException { // No-op } + + public UrlCanonicalizer getCanonicalizer() { + return canonicalizer; + } + + public void setCanonicalizer(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |