From: <bra...@us...> - 2008-07-01 23:48:38
|
Revision: 2377 http://archive-access.svn.sourceforge.net/archive-access/?rev=2377&view=rev Author: bradtofel Date: 2008-07-01 16:48:47 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult required non-trivial changes, but that was the only tangible result. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2008-07-01 23:47:50 UTC (rev 2376) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) @@ -36,8 +36,8 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; @@ -77,7 +77,7 @@ private static final String NUTCH_DIGEST = "digest"; private static final String NUTCH_PRIMARY_TYPE = "primaryType"; private static final String NUTCH_SUB_TYPE = "subType"; - private static final String NUTCH_CAPTURE_HOST = "site"; +// private static final String NUTCH_CAPTURE_HOST = "site"; private static final String NUTCH_CAPTURE_URL = "link"; private static final String NUTCH_SEARCH_RESULT_TAG = "item"; @@ -129,7 +129,7 @@ e.getMessage()); } - SearchResults results; + CaptureSearchResults results; String type = wbRequest.get(WaybackConstants.REQUEST_TYPE); if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || type.equals(WaybackConstants.REQUEST_URL_QUERY)) { @@ -157,21 +157,21 @@ Element e = (Element) nodes.item(i); - SearchResult result = elementToSearchResult(e); + CaptureSearchResult result = elementToSearchResult(e); results.addSearchResult(result); } Element channelElement = (Element) channel.item(0); - results.putFilter(WaybackConstants.RESULTS_FIRST_RETURNED, + results.putFilter(SearchResults.RESULTS_FIRST_RETURNED, getNodeContent(channelElement,NUTCH_FIRST_RESULT)); - results.putFilter(WaybackConstants.RESULTS_NUM_RESULTS, + results.putFilter(SearchResults.RESULTS_NUM_RESULTS, getNodeContent(channelElement,NUTCH_NUM_RESULTS)); - results.putFilter(WaybackConstants.RESULTS_NUM_RETURNED, + results.putFilter(SearchResults.RESULTS_NUM_RETURNED, getNodeContent(channelElement,NUTCH_NUM_RETURNED)); - results.putFilter(WaybackConstants.RESULTS_REQUESTED, + results.putFilter(SearchResults.RESULTS_REQUESTED, String.valueOf(wbRequest.getResultsPerPage())); results.putFilter(WaybackConstants.REQUEST_START_DATE, @@ -182,13 +182,12 @@ return results; } - private SearchResult elementToSearchResult(Element e) + private CaptureSearchResult elementToSearchResult(Element e) throws ResourceIndexNotAvailableException { - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); - result.put(WaybackConstants.RESULT_ARC_FILE, - getNodeNutchContent(e,NUTCH_ARCNAME)); + result.setFile(getNodeNutchContent(e,NUTCH_ARCNAME)); // The date in nutchwax is now named 'tstamp' and its // 17 characters rather than 14. Pass first 14 only. @@ -202,27 +201,21 @@ if (d.length() == 17) { d = d.substring(0, 14); } - result.put(WaybackConstants.RESULT_CAPTURE_DATE, d); + result.setCaptureTimestamp(d); //result.put(WaybackConstants.RESULT_HTTP_CODE,getNodeContent(e,"")); - result.put(WaybackConstants.RESULT_HTTP_CODE,NUTCH_DEFAULT_HTTP_CODE); - result.put(WaybackConstants.RESULT_MD5_DIGEST, - getNodeNutchContent(e,NUTCH_DIGEST)); + result.setHttpCode(NUTCH_DEFAULT_HTTP_CODE); + result.setDigest(getNodeNutchContent(e,NUTCH_DIGEST)); - result.put(WaybackConstants.RESULT_MIME_TYPE, - getNodeNutchContent(e,NUTCH_PRIMARY_TYPE) + "/" + + result.setMimeType(getNodeNutchContent(e,NUTCH_PRIMARY_TYPE) + "/" + getNodeNutchContent(e,NUTCH_SUB_TYPE)); - result.put(WaybackConstants.RESULT_OFFSET, - getNodeNutchContent(e,NUTCH_ARCOFFSET)); + result.setOffset(Long.parseLong(getNodeNutchContent(e,NUTCH_ARCOFFSET))); - result.put(WaybackConstants.RESULT_ORIG_HOST, - getNodeNutchContent(e,NUTCH_CAPTURE_HOST)); -// result.put(WaybackConstants.RESULT_REDIRECT_URL,getNodeContent(e,"")); - result.put(WaybackConstants.RESULT_REDIRECT_URL, - NUTCH_DEFAULT_REDIRECT_URL); - result.put(WaybackConstants.RESULT_URL,getNodeContent(e, - NUTCH_CAPTURE_URL)); + result.setRedirectUrl(NUTCH_DEFAULT_REDIRECT_URL); + result.setCaptureTimestamp(getNodeContent(e,NUTCH_CAPTURE_URL)); + result.setOriginalUrl(getNodeContent(e,NUTCH_CAPTURE_URL)); + result.setUrlKey(getNodeContent(e,NUTCH_CAPTURE_URL)); return result; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-07-01 23:47:50 UTC (rev 2376) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) @@ -35,9 +35,11 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UrlSearchResult; import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; @@ -122,13 +124,13 @@ throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { - +// throw new ResourceIndexNotAvailableException("oops"); return urlToSearchResults(getRequestUrl(wbRequest), getSearchResultFilters(wbRequest)); } protected SearchResults urlToSearchResults(String requestUrl, - ObjectFilter<SearchResult> filter) + ObjectFilter<CaptureSearchResult> filter) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { @@ -190,11 +192,11 @@ } } - protected ObjectFilter<SearchResult> getSearchResultFilters( + protected ObjectFilter<CaptureSearchResult> getSearchResultFilters( WaybackRequest wbRequest) { String searchType = wbRequest.get(WaybackConstants.REQUEST_TYPE); - ObjectFilterChain<SearchResult> filters = - new ObjectFilterChain<SearchResult>(); + ObjectFilterChain<CaptureSearchResult> filters = + new ObjectFilterChain<CaptureSearchResult>(); if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { @@ -210,14 +212,14 @@ } protected SearchResults documentToSearchResults(Document document, - ObjectFilter<SearchResult> filter) { + ObjectFilter<CaptureSearchResult> filter) { SearchResults results = null; NodeList filters = getRequestFilters(document); String resultsType = getResultsType(document); if(resultsType.equals(WaybackConstants.RESULTS_TYPE_CAPTURE)) { - results = new CaptureSearchResults(); + results = documentToCaptureSearchResults(document,filter); } else { - results = new UrlSearchResults(); + results = documentToUrlSearchResults(document); } for(int i = 0; i < filters.getLength(); i++) { String key = filters.item(i).getNodeName(); @@ -226,11 +228,26 @@ results.putFilter(key,value); } } - + return results; + } + private UrlSearchResults documentToUrlSearchResults( + Document document) { + UrlSearchResults results = new UrlSearchResults(); NodeList xresults = getSearchResults(document); for(int i = 0; i < xresults.getLength(); i++) { Node xresult = xresults.item(i); - SearchResult result = searchElementToSearchResult(xresult); + UrlSearchResult result = searchElementToUrlSearchResult(xresult); + results.addSearchResult(result, true); + } + return results; + } + private CaptureSearchResults documentToCaptureSearchResults( + Document document, ObjectFilter<CaptureSearchResult> filter) { + CaptureSearchResults results = new CaptureSearchResults(); + NodeList xresults = getSearchResults(document); + for(int i = 0; i < xresults.getLength(); i++) { + Node xresult = xresults.item(i); + CaptureSearchResult result = searchElementToCaptureSearchResult(xresult); int ruling = ObjectFilter.FILTER_INCLUDE; if (filter != null) { @@ -245,11 +262,21 @@ } return results; } + private UrlSearchResult searchElementToUrlSearchResult(Node e) { - private SearchResult searchElementToSearchResult(Node e) { + UrlSearchResult result = new UrlSearchResult(); + addNodeDataToSearchResult(e,result); + return result; + } + private CaptureSearchResult searchElementToCaptureSearchResult(Node e) { - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); + addNodeDataToSearchResult(e,result); + return result; + } + private void addNodeDataToSearchResult(Node e, SearchResult result) { + NodeList chitlens = e.getChildNodes(); for(int i = 0; i < chitlens.getLength(); i++) { String key = chitlens.item(i).getNodeName(); @@ -258,7 +285,6 @@ result.put(key,value); } } - return result; } protected NodeList getRequestFilters(Document d) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |