From: <bra...@us...> - 2007-07-25 00:47:06
|
Revision: 1883 http://archive-access.svn.sourceforge.net/archive-access/?rev=1883&view=rev Author: bradtofel Date: 2007-07-24 17:47:07 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: now using ObjectFilter with strong typing, instead of inheriting from SearchResultFilter Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java 2007-07-25 00:45:21 UTC (rev 1882) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeExclusionFilterFactory.java 2007-07-25 00:47:07 UTC (rev 1883) @@ -26,10 +26,10 @@ import java.util.ArrayList; import java.util.Iterator; -import java.util.Properties; -import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.core.SearchResult; import org.archive.wayback.resourceindex.filters.CompositeExclusionFilter; +import org.archive.wayback.util.ObjectFilter; /** * Class that provides SearchResult Filtering based on multiple @@ -43,14 +43,6 @@ private ArrayList<ExclusionFilterFactory> factories = new ArrayList<ExclusionFilterFactory>(); - - /* (non-Javadoc) - * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) - */ - public void init(Properties p) throws ConfigurationException { - // no-op... - } - /** * @param factory to be added to the composite @@ -62,7 +54,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.ExclusionFilterFactory#get() */ - public SearchResultFilter get() { + public ObjectFilter<SearchResult> get() { Iterator<ExclusionFilterFactory> itr = factories.iterator(); CompositeExclusionFilter filter = new CompositeExclusionFilter(); while(itr.hasNext()) { @@ -70,4 +62,20 @@ } return filter; } + + + /** + * @return the factories + */ + public ArrayList<ExclusionFilterFactory> getFactories() { + return factories; + } + + + /** + * @param factories the factories to set + */ + public void setFactories(ArrayList<ExclusionFilterFactory> factories) { + this.factories = factories; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java 2007-07-25 00:45:21 UTC (rev 1882) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ExclusionFilterFactory.java 2007-07-25 00:47:07 UTC (rev 1883) @@ -24,18 +24,18 @@ */ package org.archive.wayback.resourceindex; -import org.archive.wayback.PropertyConfigurable; - +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.ObjectFilter; /** * * * @author brad * @version $Date$, $Revision$ */ -public interface ExclusionFilterFactory extends PropertyConfigurable { +public interface ExclusionFilterFactory { /** - * @return a SearchResultFilter object that filters records based on + * @return an ObjectFilter object that filters records based on * some set of exclusion rules. */ - public SearchResultFilter get(); + public ObjectFilter<SearchResult> get(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:48:54
|
Revision: 1885 http://archive-access.svn.sourceforge.net/archive-access/?rev=1885&view=rev Author: bradtofel Date: 2007-07-24 17:48:56 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: removed all references to PropertyConfigurable interface Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-07-25 00:47:50 UTC (rev 1884) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2007-07-25 00:48:56 UTC (rev 1885) @@ -28,7 +28,6 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.util.Properties; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; @@ -37,7 +36,6 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; @@ -63,9 +61,6 @@ private static final Logger LOGGER = Logger.getLogger(NutchResourceIndex.class.getName()); - - private final static String SEARCH_BASE_URL = "resourceindex.baseurl"; - private final static int MAX_RECORDS = 1000; private int maxRecords = MAX_RECORDS; @@ -93,17 +88,12 @@ private static final String NUTCH_DEFAULT_HTTP_CODE = "200"; private static final String NUTCH_DEFAULT_REDIRECT_URL = "-"; - - /* (non-Javadoc) - * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) + /** + * @throws ConfigurationException */ - public void init(Properties p) throws ConfigurationException { + public void init() throws ConfigurationException { LOGGER.info("initializing NutchResourceIndex..."); - PropertyConfiguration pc = new PropertyConfiguration(p); - searchUrlBase = pc.getString(SEARCH_BASE_URL); LOGGER.info("Using base search url " + this.searchUrlBase); - maxRecords = pc.getInt(WaybackConstants.MAX_RESULTS_CONFIG_NAME, - MAX_RECORDS); this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(true); @@ -114,11 +104,7 @@ e.printStackTrace(); throw new ConfigurationException(e.getMessage()); } - if (!this.builder.isNamespaceAware()) { - LOGGER.severe("Builder is not namespace aware."); - } } - /* (non-Javadoc) * @see org.archive.wayback.ResourceIndex#query(org.archive.wayback.core.WaybackRequest) */ @@ -346,4 +332,28 @@ d = this.builder.parse(url); return d; } +/** + * @return the searchUrlBase + */ +public String getSearchUrlBase() { + return searchUrlBase; } +/** + * @param searchUrlBase the searchUrlBase to set + */ +public void setSearchUrlBase(String searchUrlBase) { + this.searchUrlBase = searchUrlBase; +} +/** + * @return the maxRecords + */ +public int getMaxRecords() { + return maxRecords; +} +/** + * @param maxRecords the maxRecords to set + */ +public void setMaxRecords(int maxRecords) { + this.maxRecords = maxRecords; +} +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-07-25 00:47:50 UTC (rev 1884) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2007-07-25 00:48:56 UTC (rev 1885) @@ -26,7 +26,6 @@ import java.io.File; import java.io.IOException; -import java.util.Properties; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; @@ -36,7 +35,6 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.UrlSearchResults; @@ -83,6 +81,7 @@ private static final String WB_XML_ERROR_TITLE = "title"; private static final String WB_XML_ERROR_MESSAGE = "message"; + @SuppressWarnings("unchecked") private final ThreadLocal tl = new ThreadLocal() { protected synchronized Object initialValue() { DocumentBuilder builder = null; @@ -104,21 +103,16 @@ return (DocumentBuilder) tl.get(); } - /* - * (non-Javadoc) - * - * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) - */ - public void init(Properties p) throws ConfigurationException { + /** + * @throws ConfigurationException + */ + public void init() throws ConfigurationException { LOGGER.info("initializing RemoteCDXIndex..."); - PropertyConfiguration pc = new PropertyConfiguration(p); - searchUrlBase = pc.getString(SEARCH_BASE_URL); this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(false); - LOGGER.info("Using base search url " + this.searchUrlBase); + LOGGER.info("Using base search url " + this.searchUrlBase); } - /* * (non-Javadoc) * @@ -280,4 +274,18 @@ throws IOException, SAXException { return (getDocumentBuilder()).parse(f); } + + /** + * @return the searchUrlBase + */ + public String getSearchUrlBase() { + return searchUrlBase; + } + + /** + * @param searchUrlBase the searchUrlBase to set + */ + public void setSearchUrlBase(String searchUrlBase) { + this.searchUrlBase = searchUrlBase; + } } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:49:33
|
Revision: 1886 http://archive-access.svn.sourceforge.net/archive-access/?rev=1886&view=rev Author: bradtofel Date: 2007-07-24 17:49:36 -0700 (Tue, 24 Jul 2007) Log Message: ----------- TWEAK: type safety Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2007-07-25 00:48:56 UTC (rev 1885) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2007-07-25 00:49:36 UTC (rev 1886) @@ -29,6 +29,7 @@ import java.util.Comparator; import java.util.List; +import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.cdx.CDXIndex; import org.archive.wayback.util.CloseableIterator; @@ -66,11 +67,12 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator getPrefixIterator(String prefix) + public CloseableIterator<SearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { - Comparator<Object> comparator = new SearchResultComparator(); - CompositeSortedIterator itr = new CompositeSortedIterator(comparator); + Comparator<SearchResult> comparator = new SearchResultComparator(); + CompositeSortedIterator<SearchResult> itr = + new CompositeSortedIterator<SearchResult>(comparator); for (int i = 0; i < sources.size(); i++) { itr.addComponent(sources.get(i).getPrefixIterator(prefix)); } @@ -82,11 +84,12 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator getPrefixReverseIterator(String prefix) - throws ResourceIndexNotAvailableException { + public CloseableIterator<SearchResult> getPrefixReverseIterator( + String prefix) throws ResourceIndexNotAvailableException { - Comparator<Object> comparator = new SearchResultComparator(true); - CompositeSortedIterator itr = new CompositeSortedIterator(comparator); + Comparator<SearchResult> comparator = new SearchResultComparator(true); + CompositeSortedIterator<SearchResult> itr = + new CompositeSortedIterator<SearchResult>(comparator); for (int i = 0; i < sources.size(); i++) { itr.addComponent(sources.get(i).getPrefixReverseIterator(prefix)); } @@ -96,7 +99,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator c) throws IOException{ + public void cleanup(CloseableIterator<SearchResult> c) throws IOException{ c.close(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-07-25 00:48:56 UTC (rev 1885) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-07-25 00:49:36 UTC (rev 1886) @@ -25,7 +25,6 @@ package org.archive.wayback.resourceindex; import java.io.IOException; -import java.util.Properties; import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; @@ -45,7 +44,6 @@ import org.archive.wayback.resourceindex.filters.WindowEndFilter; import org.archive.wayback.resourceindex.filters.WindowStartFilter; import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.PropertyConfiguration; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; @@ -53,7 +51,6 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.util.CloseableIterator; @@ -82,23 +79,9 @@ private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - /* - * (non-Javadoc) - * - * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) - */ - public void init(Properties p) throws ConfigurationException { - PropertyConfiguration pc = new PropertyConfiguration(p); - source = SearchResultSourceFactory.get(p); - exclusionFactory = ExclusionFilterFactoryFactory.get(p); - - maxRecords = pc.getInt(WaybackConstants.MAX_RESULTS_CONFIG_NAME, - MAX_RECORDS); - } - - private SearchResultFilter getExclusionFilter() + private ObjectFilter<SearchResult> getExclusionFilter() throws ResourceIndexNotAvailableException { - SearchResultFilter filter = null; + ObjectFilter<SearchResult> filter = null; if(exclusionFactory != null) { filter = exclusionFactory.get(); if(filter == null) { @@ -109,11 +92,12 @@ return filter; } - private void filterRecords(CloseableIterator itr, ObjectFilter filter, - SearchResults results, boolean forwards) throws IOException { + private void filterRecords(CloseableIterator<SearchResult> itr, + ObjectFilter<SearchResult> filter, SearchResults results, + boolean forwards) throws IOException { while (itr.hasNext()) { - SearchResult result = (SearchResult) itr.next(); + SearchResult result = itr.next(); int ruling = filter.filterObject(result); if (ruling == ObjectFilter.FILTER_ABORT) { break; @@ -225,7 +209,7 @@ GuardRailFilter guardrail = new GuardRailFilter(maxRecords); // checks an exclusion service for every matching record - SearchResultFilter exclusion = getExclusionFilter(); + ObjectFilter<SearchResult> exclusion = getExclusionFilter(); // count how many results got to the ExclusionFilter: CounterFilter preExCounter = new CounterFilter(); @@ -240,8 +224,10 @@ || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { results = new CaptureSearchResults(); - ObjectFilterChain forwardFilters = new ObjectFilterChain(); - ObjectFilterChain reverseFilters = new ObjectFilterChain(); + ObjectFilterChain<SearchResult> forwardFilters = + new ObjectFilterChain<SearchResult>(); + ObjectFilterChain<SearchResult> reverseFilters = + new ObjectFilterChain<SearchResult>(); // use the same guardrail for both: forwardFilters.addFilter(guardrail); @@ -320,7 +306,8 @@ results = new CaptureSearchResults(); // build up the FilterChain(s): - ObjectFilterChain filters = new ObjectFilterChain(); + ObjectFilterChain<SearchResult> filters = + new ObjectFilterChain<SearchResult>(); filters.addFilter(guardrail); filters.addFilter(new UrlMatchFilter(keyUrl)); @@ -354,7 +341,8 @@ results = new UrlSearchResults(); // build up the FilterChain(s): - ObjectFilterChain filters = new ObjectFilterChain(); + ObjectFilterChain<SearchResult> filters = + new ObjectFilterChain<SearchResult>(); filters.addFilter(guardrail); filters.addFilter(new UrlPrefixMatchFilter(keyUrl)); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java 2007-07-25 00:48:56 UTC (rev 1885) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java 2007-07-25 00:49:36 UTC (rev 1886) @@ -35,7 +35,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class SearchResultComparator implements Comparator<Object> { +public class SearchResultComparator implements Comparator<SearchResult> { private boolean backwards; /** @@ -52,11 +52,7 @@ backwards = false; } - private String objectToKey(Object o) { - if(!(o instanceof SearchResult)) { - throw new IllegalArgumentException("Need SearchResult arguments"); - } - SearchResult r = (SearchResult) o; + private String objectToKey(SearchResult r) { String urlKey = r.get(WaybackConstants.RESULT_URL_KEY); String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); return urlKey + " " + captureDate; @@ -64,7 +60,7 @@ /* (non-Javadoc) * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ - public int compare(Object o1, Object o2) { + public int compare(SearchResult o1, SearchResult o2) { String k1 = objectToKey(o1); String k2 = objectToKey(o2); if(backwards) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2007-07-25 00:48:56 UTC (rev 1885) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2007-07-25 00:49:36 UTC (rev 1886) @@ -26,6 +26,7 @@ import java.io.IOException; +import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.util.CloseableIterator; @@ -43,7 +44,7 @@ * results. * @throws ResourceIndexNotAvailableException */ - public CloseableIterator getPrefixIterator(final String prefix) + public CloseableIterator<SearchResult> getPrefixIterator(final String prefix) throws ResourceIndexNotAvailableException; /** @@ -53,12 +54,12 @@ * results. * @throws ResourceIndexNotAvailableException */ - public CloseableIterator getPrefixReverseIterator(final String prefix) + public CloseableIterator<SearchResult> getPrefixReverseIterator(final String prefix) throws ResourceIndexNotAvailableException; /** * @param c * @throws IOException */ - public void cleanup(CloseableIterator c) throws IOException; + public void cleanup(CloseableIterator<SearchResult> c) throws IOException; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-25 00:51:36
|
Revision: 1889 http://archive-access.svn.sourceforge.net/archive-access/?rev=1889&view=rev Author: bradtofel Date: 2007-07-24 17:51:38 -0700 (Tue, 24 Jul 2007) Log Message: ----------- TWEAK: type safety Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/ArcIndexer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/SearchResultToBDBRecordAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -32,6 +32,7 @@ import java.util.Iterator; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; @@ -66,8 +67,9 @@ } } - private CloseableIterator adaptIterator(Iterator itr) { - return new AdaptedIterator(itr,new BDBRecordToSearchResultAdapter()); + private CloseableIterator<SearchResult> adaptIterator( + Iterator<BDBRecord> itr) { + return new AdaptedIterator<BDBRecord,SearchResult>(itr,new BDBRecordToSearchResultAdapter()); } /* @@ -75,7 +77,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator getPrefixIterator(String prefix) + public CloseableIterator<SearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { try { @@ -90,7 +92,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator getPrefixReverseIterator(String prefix) + public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(recordIterator(prefix,false)); @@ -102,7 +104,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator c) throws IOException { + public void cleanup(CloseableIterator<SearchResult> c) throws IOException { c.close(); } @@ -146,7 +148,7 @@ CaptureSearchResults results = new CaptureSearchResults(); if(args.length == 4) { String prefix = args[3]; - CloseableIterator itr = null; + CloseableIterator<SearchResult> itr = null; try { itr = index.getPrefixIterator(prefix); } catch (ResourceIndexNotAvailableException e) { @@ -179,7 +181,7 @@ } } } else { - CloseableIterator itr = null; + CloseableIterator<SearchResult> itr = null; try { itr = index.getPrefixIterator(" "); } catch (ResourceIndexNotAvailableException e) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -29,6 +29,7 @@ import java.util.Iterator; import java.util.logging.Logger; +import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.resourceindex.indexer.ArcIndexer; @@ -146,7 +147,7 @@ private boolean mergeFile(File cdxFile) { boolean added = false; try { - Iterator it = indexer.getCDXFileBDBRecordIterator(cdxFile); + Iterator<BDBRecord> it = indexer.getCDXFileBDBRecordIterator(cdxFile); index.insertRecords(it); added = true; } catch (IOException e) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -27,7 +27,9 @@ import java.io.UnsupportedEncodingException; import org.archive.wayback.bdb.BDBRecord; +import org.archive.wayback.core.SearchResult; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; +import org.archive.wayback.util.Adapter; /** * Adapter that converts a BDBRecord into a SearchResult @@ -36,7 +38,7 @@ * @version $Date$, $Revision$ */ public class BDBRecordToSearchResultAdapter - extends CDXLineToSearchResultAdapter { + implements Adapter<BDBRecord,SearchResult> { private static int DEFAULT_SB_SIZE = 100; private StringBuilder sb; @@ -47,14 +49,11 @@ sb = new StringBuilder(DEFAULT_SB_SIZE); } - /* (non-Javadoc) - * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + /** + * @param record + * @return SearchResult representation of input BDBRecord */ - public Object adapt(Object o) { - if(!(o instanceof BDBRecord)) { - throw new IllegalArgumentException("Argument is not a BDBRecord"); - } - BDBRecord record = (BDBRecord) o; + public SearchResult adapt(BDBRecord record) { sb.setLength(0); try { String key = new String(record.getKey().getData(),"UTF-8"); @@ -68,6 +67,6 @@ // should not happen with UTF-8 hard-coded.. e.printStackTrace(); } - return super.adapt(sb.toString()); + return CDXLineToSearchResultAdapter.doAdapt(sb.toString()); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -50,14 +50,15 @@ */ private static final long serialVersionUID = 1L; - private CloseableIterator adaptIterator(Iterator itr) { - return new AdaptedIterator(itr,new CDXLineToSearchResultAdapter()); + private CloseableIterator<SearchResult> adaptIterator(Iterator<String> itr) { + return new AdaptedIterator<String,SearchResult>(itr, + new CDXLineToSearchResultAdapter()); } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator getPrefixIterator(String prefix) + public CloseableIterator<SearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(getRecordIterator(prefix)); @@ -69,7 +70,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator getPrefixReverseIterator(String prefix) + public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(getReverseRecordIterator(prefix)); @@ -84,7 +85,7 @@ * @return Iterator of SearchResults of records starting with prefix * @throws IOException */ - public Iterator getUrlIterator(final String prefix) throws IOException { + public Iterator<SearchResult> getUrlIterator(final String prefix) throws IOException { return adaptIterator(getRecordIterator(prefix)); } @@ -94,19 +95,20 @@ * @return Iterator of results in closest order to wantTS * @throws IOException */ - public Iterator getClosestIterator(final String prefix, + public Iterator<SearchResult> getClosestIterator(final String prefix, final Timestamp wantTS) throws IOException { - Iterator forwardItr = adaptIterator(getRecordIterator(prefix)); - Iterator reverseItr = adaptIterator(getReverseRecordIterator(prefix)); - TimestampComparator comparator = new TimestampComparator(wantTS); - CompositeSortedIterator itr = new CompositeSortedIterator(comparator); + Iterator<SearchResult> forwardItr = adaptIterator(getRecordIterator(prefix)); + Iterator<SearchResult> reverseItr = adaptIterator(getReverseRecordIterator(prefix)); + Comparator<SearchResult> comparator = new TimestampComparator(wantTS); + CompositeSortedIterator<SearchResult> itr = + new CompositeSortedIterator<SearchResult>(comparator); itr.addComponent(forwardItr); itr.addComponent(reverseItr); return itr; } - private class TimestampComparator implements Comparator<Object> { + private class TimestampComparator implements Comparator<SearchResult> { private int wantedSSE; /** * @param wanted @@ -114,11 +116,7 @@ public TimestampComparator(Timestamp wanted) { wantedSSE = wanted.sse(); } - private int searchResultToDistance(Object o) { - if(!(o instanceof SearchResult)) { - throw new IllegalArgumentException("Need SearchResult objects"); - } - SearchResult sr = (SearchResult) o; + private int searchResultToDistance(SearchResult sr) { String dateStr = sr.get(WaybackConstants.RESULT_CAPTURE_DATE); Timestamp ts = new Timestamp(dateStr); return Math.abs(wantedSSE - ts.sse()); @@ -126,7 +124,7 @@ /* (non-Javadoc) * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ - public int compare(Object o1, Object o2) { + public int compare(SearchResult o1, SearchResult o2) { int d1 = searchResultToDistance(o1); int d2 = searchResultToDistance(o2); if(d1 < d2) { @@ -141,7 +139,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator c) throws IOException { + public void cleanup(CloseableIterator<SearchResult> c) throws IOException { c.close(); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -38,14 +38,17 @@ * @author brad * @version $Date$, $Revision$ */ -public class CDXLineToSearchResultAdapter implements Adapter { +public class CDXLineToSearchResultAdapter implements Adapter<String,SearchResult> { - public Object adapt(Object o) { + public SearchResult adapt(String line) { + return doAdapt(line); + } + /** + * @param line + * @return SearchResult representation of input line + */ + public static SearchResult doAdapt(String line) { SearchResult result = new SearchResult(); - if(!(o instanceof String)) { - throw new IllegalArgumentException("Argument is not a String"); - } - String line = (String) o; String[] tokens = line.split(" "); if (tokens.length != 9) { return null; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/ArcIndexer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/ArcIndexer.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/ArcIndexer.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -43,6 +43,7 @@ import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; @@ -370,12 +371,13 @@ * cdxFile argument * @throws IOException */ - public Iterator getCDXFileBDBRecordIterator(File cdxFile) throws IOException { + public Iterator<BDBRecord> getCDXFileBDBRecordIterator(File cdxFile) throws IOException { FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); - AdaptedIterator searchResultItr = new AdaptedIterator( - ffile.getSequentialIterator(), + AdaptedIterator<String,SearchResult> searchResultItr = + new AdaptedIterator<String,SearchResult>( + ffile.getSequentialIterator(), new CDXLineToSearchResultAdapter()); - return new AdaptedIterator(searchResultItr, + return new AdaptedIterator<SearchResult,BDBRecord>(searchResultItr, new SearchResultToBDBRecordAdapter()); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/SearchResultToBDBRecordAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/SearchResultToBDBRecordAdapter.java 2007-07-25 00:50:55 UTC (rev 1888) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/SearchResultToBDBRecordAdapter.java 2007-07-25 00:51:38 UTC (rev 1889) @@ -37,7 +37,8 @@ * @author brad * @version $Date$, $Revision$ */ -public class SearchResultToBDBRecordAdapter implements Adapter { +public class SearchResultToBDBRecordAdapter implements + Adapter<SearchResult,BDBRecord> { DatabaseEntry key = new DatabaseEntry(); @@ -50,12 +51,7 @@ * * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public Object adapt(Object o) { - if (!(o instanceof SearchResult)) { - throw new IllegalArgumentException( - "Argument is not a SearchResult"); - } - SearchResult result = (SearchResult) o; + public BDBRecord adapt(SearchResult result) { key.setData(BDBRecordSet.stringToBytes(ArcIndexer .searchResultToString(result, ArcIndexer.TYPE_CDX_KEY))); value.setData(BDBRecordSet.stringToBytes(ArcIndexer This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-11-28 00:41:32
|
Revision: 2078 http://archive-access.svn.sourceforge.net/archive-access/?rev=2078&view=rev Author: bradtofel Date: 2007-11-27 16:41:32 -0800 (Tue, 27 Nov 2007) Log Message: ----------- FEATURE: LocalResourceIndex now has option to annotate dedupe SearchResult records with information from previous captured copies. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java 2007-11-28 00:41:32 UTC (rev 2078) @@ -0,0 +1,64 @@ +package org.archive.wayback.resourceindex; + +import java.util.HashMap; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.Adapter; + +/** + * Adapter class that observes a stream of SearchResults tracking the last seen: + * Arc/Warc Filename + * Arc/Warc offset + * HTTP Response + * MIME-Type + * Redirect URL + * + * for complete SearchResults. If subsequent SearchResults are missing these + * fields ("-") and the Digest field is the same, then the subsequent + * SearchResults are updated with the values from the kept copy, and an + * additional annotation field is added. + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DeduplicationSearchResultAnnotationAdapter +implements Adapter<SearchResult,SearchResult> { + private final static String EMPTY_VALUE = "-"; + private final static String FIELDS[] = { + WaybackConstants.RESULT_ARC_FILE, + WaybackConstants.RESULT_OFFSET, + WaybackConstants.RESULT_HTTP_CODE, + WaybackConstants.RESULT_MIME_TYPE, + WaybackConstants.RESULT_REDIRECT_URL + }; + private String lastDigest = null; + private HashMap<String,String> lastValues = new HashMap<String,String>(); + private SearchResult annotate(SearchResult o) { + String thisDigest = o.get(WaybackConstants.RESULT_MD5_DIGEST); + if(!thisDigest.equals(lastDigest)) { + return null; + } + for(String field : FIELDS) { + o.put(field, lastValues.get(field)); + } + o.put(WaybackConstants.RESULT_DUPLICATE_ANNOTATION, + WaybackConstants.RESULT_DUPLICATE_DIGEST); + return o; + } + private SearchResult remember(SearchResult o) { + lastDigest = o.get(WaybackConstants.RESULT_MD5_DIGEST); + for(String field : FIELDS) { + lastValues.put(field, o.get(field)); + } + return o; + } + public SearchResult adapt(SearchResult o) { + if(o.get(FIELDS[0]).equals(EMPTY_VALUE)) { + return annotate(o); + } + return remember(o); + } + +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-11-28 00:39:53 UTC (rev 2077) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-11-28 00:41:32 UTC (rev 2078) @@ -53,6 +53,7 @@ import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; @@ -75,12 +76,18 @@ protected SearchResultSource source; - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); + + private boolean dedupeRecords = false; private void filterRecords(CloseableIterator<SearchResult> itr, ObjectFilter<SearchResult> filter, SearchResults results, boolean forwards) throws IOException { + if(dedupeRecords) { + itr = new AdaptedIterator<SearchResult, SearchResult>(itr, + new DeduplicationSearchResultAnnotationAdapter()); + } while (itr.hasNext()) { SearchResult result = itr.next(); int ruling = filter.filterObject(result); @@ -408,4 +415,12 @@ public void setSource(SearchResultSource source) { this.source = source; } + + public boolean isDedupeRecords() { + return dedupeRecords; + } + + public void setDedupeRecords(boolean dedupeRecords) { + this.dedupeRecords = dedupeRecords; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-12-15 02:01:46
|
Revision: 2116 http://archive-access.svn.sourceforge.net/archive-access/?rev=2116&view=rev Author: bradtofel Date: 2007-12-14 18:01:51 -0800 (Fri, 14 Dec 2007) Log Message: ----------- FEATURE: added DuplicateRecordFilter to standard filter chains, to omit identical records from result stream. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-12-15 02:00:51 UTC (rev 2115) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2007-12-15 02:01:51 UTC (rev 2116) @@ -34,6 +34,7 @@ import org.archive.wayback.resourceindex.filters.CaptureToUrlResultFilter; import org.archive.wayback.resourceindex.filters.CounterFilter; import org.archive.wayback.resourceindex.filters.DateRangeFilter; +import org.archive.wayback.resourceindex.filters.DuplicateRecordFilter; import org.archive.wayback.resourceindex.filters.EndDateFilter; import org.archive.wayback.resourceindex.filters.GuardRailFilter; import org.archive.wayback.resourceindex.filters.HostMatchFilter; @@ -224,7 +225,11 @@ // use the same guardrail for both: forwardFilters.addFilter(guardrail); reverseFilters.addFilter(guardrail); - + + // BUGBUG: won't work when closest is a dupe! + forwardFilters.addFilter(new DuplicateRecordFilter()); + reverseFilters.addFilter(new DuplicateRecordFilter()); + // match URL key: forwardFilters.addFilter(new UrlMatchFilter(keyUrl)); reverseFilters.addFilter(new UrlMatchFilter(keyUrl)); @@ -298,6 +303,7 @@ ObjectFilterChain<SearchResult> filters = new ObjectFilterChain<SearchResult>(); filters.addFilter(guardrail); + filters.addFilter(new DuplicateRecordFilter()); filters.addFilter(new UrlMatchFilter(keyUrl)); if(hostMatchFilter != null) { @@ -331,6 +337,7 @@ ObjectFilterChain<SearchResult> filters = new ObjectFilterChain<SearchResult>(); filters.addFilter(guardrail); + filters.addFilter(new DuplicateRecordFilter()); filters.addFilter(new UrlPrefixMatchFilter(keyUrl)); if(hostMatchFilter != null) { Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/DuplicateRecordFilter.java 2007-12-15 02:01:51 UTC (rev 2116) @@ -0,0 +1,30 @@ +package org.archive.wayback.resourceindex.filters; + +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.util.ObjectFilter; + +/** + * ObjectFilter which omits exact duplicate URL+date records from a stream + * of SearchResults. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DuplicateRecordFilter implements ObjectFilter<SearchResult> { + private String lastUrl = null; + private String lastDate = null; + + public int filterObject(SearchResult o) { + String thisUrl = o.getUrl(); + String thisDate = o.getCaptureDate(); + int result = ObjectFilter.FILTER_INCLUDE; + if(lastUrl != null) { + if(lastUrl.equals(thisUrl) && thisDate.equals(lastDate)) { + result = FILTER_EXCLUDE; + } + } + lastUrl = thisUrl; + lastDate = thisDate; + return result; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-04-11 04:06:09
|
Revision: 2231 http://archive-access.svn.sourceforge.net/archive-access/?rev=2231&view=rev Author: bradtofel Date: 2008-04-10 21:06:12 -0700 (Thu, 10 Apr 2008) Log Message: ----------- INTERFACE: added shutdown() method to SearchResultSource interface. BDBIndex is the only implementation which uses this at present. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2008-04-11 04:05:05 UTC (rev 2230) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2008-04-11 04:06:12 UTC (rev 2231) @@ -137,4 +137,10 @@ public List<SearchResultSource> getSources() { return sources; } + + public void shutdown() throws IOException { + for(SearchResultSource source : sources) { + source.shutdown(); + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2008-04-11 04:05:05 UTC (rev 2230) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2008-04-11 04:06:12 UTC (rev 2231) @@ -62,4 +62,10 @@ * @throws IOException */ public void cleanup(CloseableIterator<SearchResult> c) throws IOException; + + /** + * @param c + * @throws IOException + */ + public void shutdown() throws IOException; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-04-11 04:05:05 UTC (rev 2230) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-04-11 04:06:12 UTC (rev 2231) @@ -254,4 +254,12 @@ public void setUpdater(BDBIndexUpdater updater) { this.updater = updater; } + + public void shutdown() throws IOException { + try { + shutdownDB(); + } catch (DatabaseException e) { + throw new IOException(e.getMessage()); + } + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2008-04-11 04:05:05 UTC (rev 2230) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2008-04-11 04:06:12 UTC (rev 2231) @@ -142,4 +142,8 @@ public void cleanup(CloseableIterator<SearchResult> c) throws IOException { c.close(); } + + public void shutdown() throws IOException { + // no-op + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-24 23:40:07
|
Revision: 2309 http://archive-access.svn.sourceforge.net/archive-access/?rev=2309&view=rev Author: bradtofel Date: 2008-06-24 16:40:15 -0700 (Tue, 24 Jun 2008) Log Message: ----------- REFACTOR: moving index update code to separate package. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-06-24 23:36:30 UTC (rev 2308) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-06-24 23:40:15 UTC (rev 2309) @@ -30,18 +30,21 @@ import java.io.PrintWriter; import java.util.Iterator; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; import org.archive.wayback.core.SearchResult; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; -import org.archive.wayback.resourceindex.SearchResultSource; +import org.archive.wayback.resourceindex.UpdatableSearchResultSource; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; +import org.archive.wayback.resourceindex.updater.BDBIndexUpdater; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.flatfile.RecordIterator; +import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; import com.sleepycat.je.DatabaseException; @@ -51,7 +54,9 @@ * @author brad * @version $Date$, $Revision$ */ -public class BDBIndex extends BDBRecordSet implements SearchResultSource { +public class BDBIndex extends BDBRecordSet implements + UpdatableSearchResultSource { + private String bdbPath = null; private String bdbName = null; private BDBIndexUpdater updater = null; @@ -107,7 +112,22 @@ public void cleanup(CloseableIterator<SearchResult> c) throws IOException { c.close(); } - + + /* (non-Javadoc) + * @see org.archive.wayback.resourceindex.UpdatableSearchResultSource#addSearchResults(java.util.Iterator) + */ + public void addSearchResults(Iterator<SearchResult> itr, + UrlCanonicalizer canonicalizer) throws IOException { + Adapter<SearchResult,BDBRecord> adapterSRtoBDB = + new SearchResultToBDBRecordAdapter(canonicalizer); + + Iterator<BDBRecord> itrBDB = + new AdaptedIterator<SearchResult,BDBRecord>(itr, + adapterSRtoBDB); + + insertRecords(itrBDB); + + } private static void USAGE() { System.err.println("Usage: DBPATH DBNAME -w"); System.err.println("\tRead lines from STDIN, inserting into BDBJE at\n" + @@ -133,7 +153,7 @@ String name = args[1]; String op = args[2]; BDBIndex index = new BDBIndex(); - + UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); try { index.initializeDB(path,name); } catch (DatabaseException e) { @@ -204,14 +224,20 @@ Iterator<SearchResult> itrSR = new AdaptedIterator<String,SearchResult>(itrS,adapterStoSR); - Adapter<SearchResult,BDBRecord> adapterSRtoBDB = - new SearchResultToBDBRecordAdapter(); - - Iterator<BDBRecord> itrBDB = - new AdaptedIterator<SearchResult,BDBRecord>(itrSR, - adapterSRtoBDB); - - index.insertRecords(itrBDB); +// Adapter<SearchResult,BDBRecord> adapterSRtoBDB = +// new SearchResultToBDBRecordAdapter(); +// +// Iterator<BDBRecord> itrBDB = +// new AdaptedIterator<SearchResult,BDBRecord>(itrSR, +// adapterSRtoBDB); +// +// index.insertRecords(itrBDB); + try { + index.addSearchResults(itrSR, canonicalizer); + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } } else { USAGE(); } Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java 2008-06-24 23:36:30 UTC (rev 2308) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java 2008-06-24 23:40:15 UTC (rev 2309) @@ -1,399 +0,0 @@ -/* BDBIndexUpdater - * - * $Id$ - * - * Created on 2:59:40 PM Oct 12, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.bdb; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.archive.wayback.bdb.BDBRecord; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; -//import org.archive.wayback.resourcestore.ArcIndexer; -import org.archive.wayback.util.AdaptedIterator; -import org.archive.wayback.util.flatfile.FlatFile; - -/** - * Class which starts a background thread that repeatedly scans an incoming - * directory and merges files found therein(which are assumed to be in CDX - * format) with a BDBIndex. Optional configurations include: - * - * target directory where merged files are moved to (otherwise deleted) - * target directory where failed failed are moved(otherwise left in place) - * milliseconds between scans of the incoming directory(default 10000) - * - * @author brad - * @version $Date$, $Revision$ - */ -public class BDBIndexUpdater { - /** - * Logger for this class - */ - private static final Logger LOGGER = - Logger.getLogger(BDBIndexUpdater.class.getName()); - - private final static int DEFAULT_RUN_INTERVAL_MS = 10000; - - private BDBIndex index = null; - - private File incoming = null; - - private File merged = null; - - private File failed = null; - - private int runInterval = DEFAULT_RUN_INTERVAL_MS; - - /** - * Thread object of update thread -- also is flag indicating if the thread - * has already been started. Access to it is synchronized. - */ - private Thread updateThread = null; - - /** - * Default constructor - */ - public BDBIndexUpdater() { - - } - /** - * @param index - * @param incoming - */ - public BDBIndexUpdater(BDBIndex index, File incoming) { - this.index = index; - this.incoming = incoming; - } - - /** - * start the background index merging thread - * @throws ConfigurationException - */ - public void init() throws ConfigurationException { - if(index == null) { - throw new ConfigurationException("No index target on bdb updater"); - } - if(incoming == null) { - throw new ConfigurationException("No incoming on bdb updater"); - } - startUpdateThread(); - } - - /** Ensure the argument directory exists - * @param dir - * @throws IOException - */ - private void ensureDir(File dir) throws IOException { - if (!dir.isDirectory() && !dir.mkdirs()) { - throw new IOException("FAILED to create " + dir.getAbsolutePath()); - } - } - - /** - * start a background thread that merges new CDX files in incoming into - * the BDBIndex. - * - * @throws ConfigurationException - */ - public void startup() throws ConfigurationException { - try { - ensureDir(incoming); - if(merged != null) ensureDir(merged); - if(failed != null) ensureDir(failed); - } catch (IOException e) { - e.printStackTrace(); - throw new ConfigurationException(e.getMessage()); - } - - if (updateThread == null) { - startUpdateThread(); - } - } - - /** - * start the BDBIndexUpdaterThread thread, which will scan for new cdx files - * in the incoming directory, and add them to the BDBIndex. - */ - private synchronized void startUpdateThread() { - if (updateThread != null) { - return; - } - updateThread = new BDBIndexUpdaterThread(this,runInterval); - updateThread.start(); - } - - - private boolean mergeFile(File cdxFile) { - boolean added = false; - try { - FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); - AdaptedIterator<String,SearchResult> searchResultItr = - new AdaptedIterator<String,SearchResult>( - ffile.getSequentialIterator(), - new CDXLineToSearchResultAdapter()); - Iterator<BDBRecord> it = new AdaptedIterator<SearchResult,BDBRecord> - (searchResultItr,new SearchResultToBDBRecordAdapter()); - - index.insertRecords(it); - added = true; - } catch (IOException e) { - e.printStackTrace(); - } - return added; - } - - private File getTargetFile(File f, File targetDir) { - File target = new File(targetDir, f.getName()); - int x = 0; - while(target.exists()) { - if(x++ > 255) { - throw new RuntimeException("too many " - + "duplicates of file " + f.getAbsolutePath() + - " in " + targetDir.getAbsolutePath()); - } - target = new File(targetDir,f.getName() + "." + x); - } - return target; - } - - private File ensureDir(String path) throws ConfigurationException { - if(path.length() < 1) { - throw new ConfigurationException("Empty directory path"); - } - File dir = new File(path); - if(dir.exists()) { - if(!dir.isDirectory()) { - throw new ConfigurationException("path " + path + "exists" + - "but is not a directory"); - } - } else { - if(!dir.mkdirs()) { - throw new ConfigurationException("unable to create directory" + - " at " + path); - } - } - return dir; - } - - private void handleMerged(File f) { - if (merged == null) { - if (!f.delete()) { - // big problems... lets exit - throw new RuntimeException("Unable to delete " - + f.getAbsolutePath()); - } - LOGGER.info("Removed merged file " + f.getAbsolutePath()); - } else { - // move to merged: - File target = getTargetFile(f,merged); - if (!f.renameTo(target)) { - throw new RuntimeException("FAILED rename" + "(" - + f.getAbsolutePath() + ") to " + "(" - + target.getAbsolutePath() + ")"); - } - LOGGER.info("Renamed merged file " + f.getAbsolutePath() + " to " + - target.getAbsolutePath()); - } - } - - private void handleFailed(File f) { - if (failed == null) { - // nothing much to do.. just complain and leave it. - LOGGER.info("FAILED INDEX: " + f.getAbsolutePath()); - } else { - // move to failed: - File target = getTargetFile(f,failed); - if (!f.renameTo(target)) { - throw new RuntimeException("FAILED rename" + "(" - + f.getAbsolutePath() + ") to " + "(" - + target.getAbsolutePath() + ")"); - } - LOGGER.info("Renamed failed merge file " + f.getAbsolutePath() + - " to " + target.getAbsolutePath()); - } - } - - protected int mergeAll() { - int numMerged = 0; - File incomingFiles[] = incoming.listFiles(); - int i = 0; - for (i = 0; i < incomingFiles.length; i++) { - File f = incomingFiles[i]; - if (f.isFile()) { - if (mergeFile(f)) { - handleMerged(f); - numMerged++; - } else { - handleFailed(f); - } - } - } - return numMerged; - } - - /** - * @return the index - */ - public BDBIndex getIndex() { - return index; - } - - /** - * @param index the index to set - */ - public void setIndex(BDBIndex index) { - this.index = index; - } - - /** - * @return the incoming - */ - public String getIncoming() { - if(incoming == null) { - return null; - } - return incoming.getAbsolutePath(); - } - - /** - * @param incoming the incoming to set - * @throws ConfigurationException - */ - public void setIncoming(String incoming) throws ConfigurationException { - this.incoming = ensureDir(incoming); - } - - - /** - * @return the merged - */ - public String getMerged() { - if(merged == null) { - return null; - } - return merged.getAbsolutePath(); - } - - /** - * @param merged The merged to set. - * @throws ConfigurationException - */ - public void setMerged(String merged) throws ConfigurationException { - this.merged = ensureDir(merged); - } - /** - * @param merged - * @throws IOException - */ - public void setMerged(File merged) throws IOException { - ensureDir(merged); - this.merged = merged; - } - - /** - * @return the failed - */ - public String getFailed() { - if(failed == null) { - return null; - } - return failed.getAbsolutePath(); - } - - /** - * @param failed The failed to set. - * @throws ConfigurationException - */ - public void setFailed(String failed) throws ConfigurationException { - this.failed = ensureDir(failed); - } - /** - * @param failed - * @throws IOException - */ - public void setFailed(File failed) throws IOException { - ensureDir(failed); - this.failed = failed; - } - - /** - * @return the runInterval - */ - public int getRunInterval() { - return runInterval; - } - - /** - * @param runInterval The runInterval to set. - */ - public void setRunInterval(int runInterval) { - this.runInterval = runInterval; - } - /** - * Thread that repeatedly calls mergeAll on the BDBIndexUpdater. - * - * @author Brad Tofel - * @version $Date$, $Revision$ - */ - private class BDBIndexUpdaterThread extends Thread { - /** - * object which merges CDX files with the BDBResourceIndex - */ - private BDBIndexUpdater updater = null; - - private int runInterval; - - /** - * @param updater - * @param runInterval - */ - public BDBIndexUpdaterThread(BDBIndexUpdater updater, int runInterval) { - super("BDBIndexUpdaterThread"); - super.setDaemon(true); - this.updater = updater; - this.runInterval = runInterval; - LOGGER.info("BDBIndexUpdaterThread is alive."); - } - - public void run() { - int sleepInterval = runInterval; - while (true) { - try { - int numMerged = updater.mergeAll(); - if (numMerged == 0) { - sleep(sleepInterval); - sleepInterval += runInterval; - } else { - sleepInterval = runInterval; - } - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - } - } -} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2008-06-24 23:36:30 UTC (rev 2308) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2008-06-24 23:40:15 UTC (rev 2309) @@ -24,6 +24,7 @@ */ package org.archive.wayback.resourceindex.bdb; +import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; @@ -47,8 +48,14 @@ BDBRecord record = new BDBRecord(key, value); + private UrlCanonicalizer canonicalizer = null; + private final static String DELIMITER = " "; + public SearchResultToBDBRecordAdapter(UrlCanonicalizer canonicalizer) { + this.canonicalizer = canonicalizer; + } + /* * (non-Javadoc) * Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java (from rev 2302, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndexUpdater.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java 2008-06-24 23:40:15 UTC (rev 2309) @@ -0,0 +1,401 @@ +/* BDBIndexUpdater + * + * $Id$ + * + * Created on 2:59:40 PM Oct 12, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.updater; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.archive.wayback.bdb.BDBRecord; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.resourceindex.bdb.BDBIndex; +import org.archive.wayback.resourceindex.bdb.SearchResultToBDBRecordAdapter; +import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; +//import org.archive.wayback.resourcestore.ArcIndexer; +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.flatfile.FlatFile; + +/** + * Class which starts a background thread that repeatedly scans an incoming + * directory and merges files found therein(which are assumed to be in CDX + * format) with a BDBIndex. Optional configurations include: + * + * target directory where merged files are moved to (otherwise deleted) + * target directory where failed failed are moved(otherwise left in place) + * milliseconds between scans of the incoming directory(default 10000) + * + * @author brad + * @version $Date$, $Revision$ + */ +public class BDBIndexUpdater { + /** + * Logger for this class + */ + private static final Logger LOGGER = + Logger.getLogger(BDBIndexUpdater.class.getName()); + + private final static int DEFAULT_RUN_INTERVAL_MS = 10000; + + private BDBIndex index = null; + + private File incoming = null; + + private File merged = null; + + private File failed = null; + + private int runInterval = DEFAULT_RUN_INTERVAL_MS; + + /** + * Thread object of update thread -- also is flag indicating if the thread + * has already been started. Access to it is synchronized. + */ + private Thread updateThread = null; + + /** + * Default constructor + */ + public BDBIndexUpdater() { + + } + /** + * @param index + * @param incoming + */ + public BDBIndexUpdater(BDBIndex index, File incoming) { + this.index = index; + this.incoming = incoming; + } + + /** + * start the background index merging thread + * @throws ConfigurationException + */ + public void init() throws ConfigurationException { + if(index == null) { + throw new ConfigurationException("No index target on bdb updater"); + } + if(incoming == null) { + throw new ConfigurationException("No incoming on bdb updater"); + } + startUpdateThread(); + } + + /** Ensure the argument directory exists + * @param dir + * @throws IOException + */ + private void ensureDir(File dir) throws IOException { + if (!dir.isDirectory() && !dir.mkdirs()) { + throw new IOException("FAILED to create " + dir.getAbsolutePath()); + } + } + + /** + * start a background thread that merges new CDX files in incoming into + * the BDBIndex. + * + * @throws ConfigurationException + */ + public void startup() throws ConfigurationException { + try { + ensureDir(incoming); + if(merged != null) ensureDir(merged); + if(failed != null) ensureDir(failed); + } catch (IOException e) { + e.printStackTrace(); + throw new ConfigurationException(e.getMessage()); + } + + if (updateThread == null) { + startUpdateThread(); + } + } + + /** + * start the BDBIndexUpdaterThread thread, which will scan for new cdx files + * in the incoming directory, and add them to the BDBIndex. + */ + private synchronized void startUpdateThread() { + if (updateThread != null) { + return; + } + updateThread = new BDBIndexUpdaterThread(this,runInterval); + updateThread.start(); + } + + + private boolean mergeFile(File cdxFile) { + boolean added = false; + try { + FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); + AdaptedIterator<String,SearchResult> searchResultItr = + new AdaptedIterator<String,SearchResult>( + ffile.getSequentialIterator(), + new CDXLineToSearchResultAdapter()); + Iterator<BDBRecord> it = new AdaptedIterator<SearchResult,BDBRecord> + (searchResultItr,new SearchResultToBDBRecordAdapter()); + + index.insertRecords(it); + added = true; + } catch (IOException e) { + e.printStackTrace(); + } + return added; + } + + private File getTargetFile(File f, File targetDir) { + File target = new File(targetDir, f.getName()); + int x = 0; + while(target.exists()) { + if(x++ > 255) { + throw new RuntimeException("too many " + + "duplicates of file " + f.getAbsolutePath() + + " in " + targetDir.getAbsolutePath()); + } + target = new File(targetDir,f.getName() + "." + x); + } + return target; + } + + private File ensureDir(String path) throws ConfigurationException { + if(path.length() < 1) { + throw new ConfigurationException("Empty directory path"); + } + File dir = new File(path); + if(dir.exists()) { + if(!dir.isDirectory()) { + throw new ConfigurationException("path " + path + "exists" + + "but is not a directory"); + } + } else { + if(!dir.mkdirs()) { + throw new ConfigurationException("unable to create directory" + + " at " + path); + } + } + return dir; + } + + private void handleMerged(File f) { + if (merged == null) { + if (!f.delete()) { + // big problems... lets exit + throw new RuntimeException("Unable to delete " + + f.getAbsolutePath()); + } + LOGGER.info("Removed merged file " + f.getAbsolutePath()); + } else { + // move to merged: + File target = getTargetFile(f,merged); + if (!f.renameTo(target)) { + throw new RuntimeException("FAILED rename" + "(" + + f.getAbsolutePath() + ") to " + "(" + + target.getAbsolutePath() + ")"); + } + LOGGER.info("Renamed merged file " + f.getAbsolutePath() + " to " + + target.getAbsolutePath()); + } + } + + private void handleFailed(File f) { + if (failed == null) { + // nothing much to do.. just complain and leave it. + LOGGER.info("FAILED INDEX: " + f.getAbsolutePath()); + } else { + // move to failed: + File target = getTargetFile(f,failed); + if (!f.renameTo(target)) { + throw new RuntimeException("FAILED rename" + "(" + + f.getAbsolutePath() + ") to " + "(" + + target.getAbsolutePath() + ")"); + } + LOGGER.info("Renamed failed merge file " + f.getAbsolutePath() + + " to " + target.getAbsolutePath()); + } + } + + protected int mergeAll() { + int numMerged = 0; + File incomingFiles[] = incoming.listFiles(); + int i = 0; + for (i = 0; i < incomingFiles.length; i++) { + File f = incomingFiles[i]; + if (f.isFile()) { + if (mergeFile(f)) { + handleMerged(f); + numMerged++; + } else { + handleFailed(f); + } + } + } + return numMerged; + } + + /** + * @return the index + */ + public BDBIndex getIndex() { + return index; + } + + /** + * @param index the index to set + */ + public void setIndex(BDBIndex index) { + this.index = index; + } + + /** + * @return the incoming + */ + public String getIncoming() { + if(incoming == null) { + return null; + } + return incoming.getAbsolutePath(); + } + + /** + * @param incoming the incoming to set + * @throws ConfigurationException + */ + public void setIncoming(String incoming) throws ConfigurationException { + this.incoming = ensureDir(incoming); + } + + + /** + * @return the merged + */ + public String getMerged() { + if(merged == null) { + return null; + } + return merged.getAbsolutePath(); + } + + /** + * @param merged The merged to set. + * @throws ConfigurationException + */ + public void setMerged(String merged) throws ConfigurationException { + this.merged = ensureDir(merged); + } + /** + * @param merged + * @throws IOException + */ + public void setMerged(File merged) throws IOException { + ensureDir(merged); + this.merged = merged; + } + + /** + * @return the failed + */ + public String getFailed() { + if(failed == null) { + return null; + } + return failed.getAbsolutePath(); + } + + /** + * @param failed The failed to set. + * @throws ConfigurationException + */ + public void setFailed(String failed) throws ConfigurationException { + this.failed = ensureDir(failed); + } + /** + * @param failed + * @throws IOException + */ + public void setFailed(File failed) throws IOException { + ensureDir(failed); + this.failed = failed; + } + + /** + * @return the runInterval + */ + public int getRunInterval() { + return runInterval; + } + + /** + * @param runInterval The runInterval to set. + */ + public void setRunInterval(int runInterval) { + this.runInterval = runInterval; + } + /** + * Thread that repeatedly calls mergeAll on the BDBIndexUpdater. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ + private class BDBIndexUpdaterThread extends Thread { + /** + * object which merges CDX files with the BDBResourceIndex + */ + private BDBIndexUpdater updater = null; + + private int runInterval; + + /** + * @param updater + * @param runInterval + */ + public BDBIndexUpdaterThread(BDBIndexUpdater updater, int runInterval) { + super("BDBIndexUpdaterThread"); + super.setDaemon(true); + this.updater = updater; + this.runInterval = runInterval; + LOGGER.info("BDBIndexUpdaterThread is alive."); + } + + public void run() { + int sleepInterval = runInterval; + while (true) { + try { + int numMerged = updater.mergeAll(); + if (numMerged == 0) { + sleep(sleepInterval); + sleepInterval += runInterval; + } else { + sleepInterval = runInterval; + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-24 23:41:31
|
Revision: 2310 http://archive-access.svn.sourceforge.net/archive-access/?rev=2310&view=rev Author: bradtofel Date: 2008-06-24 16:41:39 -0700 (Tue, 24 Jun 2008) Log Message: ----------- REFACTOR: moving index update code to separate package. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/RemoteSubmitFilter.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java 2008-06-24 23:40:15 UTC (rev 2309) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java 2008-06-24 23:41:39 UTC (rev 2310) @@ -1,204 +0,0 @@ -/* IndexClient - * - * $Id$ - * - * Created on 4:22:52 PM Oct 12, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.indexer; - -import java.io.File; -import java.io.BufferedOutputStream; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpStatus; -import org.apache.commons.httpclient.methods.InputStreamRequestEntity; -import org.apache.commons.httpclient.methods.PutMethod; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; -import org.archive.wayback.util.AdaptedIterator; -import org.archive.wayback.util.Adapter; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class IndexClient { - private static final Logger LOGGER = Logger.getLogger(IndexClient - .class.getName()); - - private String target = null; - private File tmpDir = null; - - private HttpClient client = new HttpClient(); - - /** - * @param cdx - * @return true if CDX was added to local or remote index - * @throws HttpException - * @throws IOException - */ - public boolean addCDX(File cdx) throws HttpException, IOException { - boolean added = false; - if(target == null) { - throw new IOException("No target set"); - } - String base = cdx.getName(); - if(target.startsWith("http://")) { - String finalUrl = target; - if(target.endsWith("/")) { - finalUrl = target + base; - } else { - finalUrl = target + "/" + base; - } - PutMethod method = new PutMethod(finalUrl); - method.setRequestEntity(new InputStreamRequestEntity( - new FileInputStream(cdx))); - - int statusCode = client.executeMethod(method); - if (statusCode == HttpStatus.SC_OK) { - LOGGER.info("Uploaded cdx " + cdx.getAbsolutePath() + " to " + - finalUrl); - if(!cdx.delete()) { - throw new IOException("FAILED delete " + - cdx.getAbsolutePath()); - } - - added = true; - } else { - throw new IOException("Method failed: " + method.getStatusLine() - + " for URL " + finalUrl + " on file " - + cdx.getAbsolutePath()); - } - - } else { - // assume a local directory: - File toBeMergedDir = new File(target); - if(!toBeMergedDir.exists()) { - toBeMergedDir.mkdirs(); - } - if(!toBeMergedDir.exists()) { - throw new IOException("Target " + target + " does not exist"); - } - if(!toBeMergedDir.isDirectory()) { - throw new IOException("Target " + target + " is not a dir"); - } - if(!toBeMergedDir.canWrite()) { - throw new IOException("Target " + target + " is not writable"); - } - File toBeMergedFile = new File(toBeMergedDir,base); - if(toBeMergedFile.exists()) { - LOGGER.severe("WARNING: "+toBeMergedFile.getAbsolutePath() + - "already exists!"); - } else { - if(cdx.renameTo(toBeMergedFile)) { - LOGGER.info("Queued " + toBeMergedFile.getAbsolutePath() + - " for merging."); - added = true; - } else { - LOGGER.severe("FAILED rename("+cdx.getAbsolutePath()+ - ") to ("+toBeMergedFile.getAbsolutePath()+")"); - } - } - } - return added; - } - - /** - * @param base - * @param itr - * @return true if data was added to local or remote index - * @throws HttpException - * @throws IOException - */ - public boolean addSearchResults(String base, Iterator<SearchResult> itr) - throws HttpException, IOException { - - if(tmpDir == null) { - throw new IOException("No tmpDir argument"); - } - File tmpFile = new File(tmpDir,base); - if(tmpFile.exists()) { - // TODO: is this safe? - if(!tmpFile.delete()) { - throw new IOException("Unable to remove tmp " + - tmpFile.getAbsolutePath()); - } - } - FileOutputStream os = new FileOutputStream(tmpFile); - BufferedOutputStream bos = new BufferedOutputStream(os); - PrintWriter pw = new PrintWriter(bos); - - Adapter<SearchResult,String> adapterSRtoS = - new SearchResultToCDXLineAdapter(); - Iterator<String> itrS = - new AdaptedIterator<SearchResult,String>(itr,adapterSRtoS); - - while(itrS.hasNext()) { - pw.println(itrS.next()); - } - pw.close(); - boolean added = addCDX(tmpFile); - return added; - } - - /** - * @return the target - */ - public String getTarget() { - return target; - } - - /** - * @param target the target to set - */ - public void setTarget(String target) { - this.target = target; - } - - /** - * @return the tmpDir - */ - public String getTmpDir() { - if(tmpDir == null) { - return null; - } - return tmpDir.getAbsolutePath(); - } - - /** - * @param tmpDir the tmpDir to set - */ - public void setTmpDir(String tmpDir) { - this.tmpDir = new File(tmpDir); - if(!this.tmpDir.isDirectory()) { - this.tmpDir.mkdirs(); - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/RemoteSubmitFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/RemoteSubmitFilter.java 2008-06-24 23:40:15 UTC (rev 2309) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/RemoteSubmitFilter.java 2008-06-24 23:41:39 UTC (rev 2310) @@ -1,187 +0,0 @@ -/* RemoteSubmitFilter - * - * $Id$ - * - * Created on 3:57:00 PM Oct 12, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.indexer; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.util.Enumeration; -import java.util.Properties; - -import javax.servlet.Filter; -import javax.servlet.FilterChain; -import javax.servlet.FilterConfig; -import javax.servlet.ServletContext; -import javax.servlet.ServletException; -import javax.servlet.ServletRequest; -import javax.servlet.ServletResponse; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -/** - * Filter that accepts PUT HTTP requests to insert CDX files into the incoming - * directory for a local BDBIndex. - * - * @author brad - * @version $Date$, $Revision$ - */ -public class RemoteSubmitFilter implements Filter { - - private final static String INCOMING_PATH = "config-tmp.incoming"; - private final static String HTTP_PUT_METHOD = "PUT"; - private File incoming = null; - private File tmpIncoming = null; - - // TODO: get rid of this - @SuppressWarnings("unchecked") - public void init(FilterConfig c) throws ServletException { - - Properties p = new Properties(); - ServletContext sc = c.getServletContext(); - for (Enumeration e = sc.getInitParameterNames(); e.hasMoreElements();) { - String key = (String) e.nextElement(); - p.put(key, sc.getInitParameter(key)); - } - for (Enumeration e = c.getInitParameterNames(); e.hasMoreElements();) { - String key = (String) e.nextElement(); - p.put(key, c.getInitParameter(key)); - } - - String cfgName = INCOMING_PATH; - String incomingPath = p.getProperty(cfgName); - if((incomingPath == null) || incomingPath.length() == 0) { - throw new ServletException("Invalid or missing " + cfgName + - " configuration"); - } - incoming = new File(incomingPath); - tmpIncoming = new File(incoming,"tmp"); - try { - ensureDir(incoming); - ensureDir(tmpIncoming); - } catch (IOException e) { - throw new ServletException(e); - } - } - private void ensureDir(File dir) throws IOException { - if(dir.exists()) { - if(!dir.isDirectory()) { - throw new IOException("Path " + dir.getAbsolutePath() + - "exists but is not a directory."); - } - } else { - if(!dir.mkdirs()) { - throw new IOException("FAILED mkdir " + dir.getAbsolutePath()); - } - } - } - - /* - * (non-Javadoc) - * - * @see javax.servlet.Filter#doFilter(javax.servlet.ServletRequest, - * javax.servlet.ServletResponse, javax.servlet.FilterChain) - */ - public void doFilter(ServletRequest request, ServletResponse response, - FilterChain chain) throws IOException, ServletException { - if (!handle(request, response)) { - chain.doFilter(request, response); - } - } - /** - * @param request - * @param response - * @return boolean, true unless something went wrong.. - * @throws IOException - * @throws ServletException - */ - protected boolean handle(final ServletRequest request, - final ServletResponse response) throws IOException, - ServletException { - if (!(request instanceof HttpServletRequest)) { - return false; - } - if (!(response instanceof HttpServletResponse)) { - return false; - } - HttpServletRequest httpRequest = (HttpServletRequest) request; - if(httpRequest.getMethod().equals(HTTP_PUT_METHOD)) { - - return handlePut(httpRequest,response); - - } - return false; - } - - protected boolean handlePut(final HttpServletRequest request, - final ServletResponse response) throws IOException, - ServletException { - - String reqURI = request.getRequestURI(); - int lastSlashIdx = reqURI.lastIndexOf("/"); - if (lastSlashIdx == -1) { - return false; - } - String targetFileName = reqURI.substring(lastSlashIdx + 1); - String tmpFileName = targetFileName + ".tmp"; - File tmpFile = new File(tmpIncoming,tmpFileName); - File targetFile = new File(incoming, targetFileName); - - int i; - InputStream input; - input = request.getInputStream(); - BufferedInputStream in = new BufferedInputStream(input); - BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - FileWriter out = new FileWriter(tmpFile); - - while ((i = reader.read()) != -1) { - out.write(i); - } - - out.close(); - in.close(); - if (!tmpFile.renameTo(targetFile)) { - throw new IOException("Unable to rename " - + tmpFile.getAbsolutePath() + " to " - + targetFile.getAbsolutePath()); - } - - PrintWriter outHTML = response.getWriter(); - outHTML.println("done"); - return true; - } - - /* (non-Javadoc) - * @see javax.servlet.Filter#destroy() - */ - public void destroy() { - // TODO Auto-generated method stub - - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java (from rev 2302, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/IndexClient.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/IndexClient.java 2008-06-24 23:41:39 UTC (rev 2310) @@ -0,0 +1,204 @@ +/* IndexClient + * + * $Id$ + * + * Created on 4:22:52 PM Oct 12, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.updater; + +import java.io.File; +import java.io.BufferedOutputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpStatus; +import org.apache.commons.httpclient.methods.InputStreamRequestEntity; +import org.apache.commons.httpclient.methods.PutMethod; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.Adapter; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class IndexClient { + private static final Logger LOGGER = Logger.getLogger(IndexClient + .class.getName()); + + private String target = null; + private File tmpDir = null; + + private HttpClient client = new HttpClient(); + + /** + * @param cdx + * @return true if CDX was added to local or remote index + * @throws HttpException + * @throws IOException + */ + public boolean addCDX(File cdx) throws HttpException, IOException { + boolean added = false; + if(target == null) { + throw new IOException("No target set"); + } + String base = cdx.getName(); + if(target.startsWith("http://")) { + String finalUrl = target; + if(target.endsWith("/")) { + finalUrl = target + base; + } else { + finalUrl = target + "/" + base; + } + PutMethod method = new PutMethod(finalUrl); + method.setRequestEntity(new InputStreamRequestEntity( + new FileInputStream(cdx))); + + int statusCode = client.executeMethod(method); + if (statusCode == HttpStatus.SC_OK) { + LOGGER.info("Uploaded cdx " + cdx.getAbsolutePath() + " to " + + finalUrl); + if(!cdx.delete()) { + throw new IOException("FAILED delete " + + cdx.getAbsolutePath()); + } + + added = true; + } else { + throw new IOException("Method failed: " + method.getStatusLine() + + " for URL " + finalUrl + " on file " + + cdx.getAbsolutePath()); + } + + } else { + // assume a local directory: + File toBeMergedDir = new File(target); + if(!toBeMergedDir.exists()) { + toBeMergedDir.mkdirs(); + } + if(!toBeMergedDir.exists()) { + throw new IOException("Target " + target + " does not exist"); + } + if(!toBeMergedDir.isDirectory()) { + throw new IOException("Target " + target + " is not a dir"); + } + if(!toBeMergedDir.canWrite()) { + throw new IOException("Target " + target + " is not writable"); + } + File toBeMergedFile = new File(toBeMergedDir,base); + if(toBeMergedFile.exists()) { + LOGGER.severe("WARNING: "+toBeMergedFile.getAbsolutePath() + + "already exists!"); + } else { + if(cdx.renameTo(toBeMergedFile)) { + LOGGER.info("Queued " + toBeMergedFile.getAbsolutePath() + + " for merging."); + added = true; + } else { + LOGGER.severe("FAILED rename("+cdx.getAbsolutePath()+ + ") to ("+toBeMergedFile.getAbsolutePath()+")"); + } + } + } + return added; + } + + /** + * @param base + * @param itr + * @return true if data was added to local or remote index + * @throws HttpException + * @throws IOException + */ + public boolean addSearchResults(String base, Iterator<SearchResult> itr) + throws HttpException, IOException { + + if(tmpDir == null) { + throw new IOException("No tmpDir argument"); + } + File tmpFile = new File(tmpDir,base); + if(tmpFile.exists()) { + // TODO: is this safe? + if(!tmpFile.delete()) { + throw new IOException("Unable to remove tmp " + + tmpFile.getAbsolutePath()); + } + } + FileOutputStream os = new FileOutputStream(tmpFile); + BufferedOutputStream bos = new BufferedOutputStream(os); + PrintWriter pw = new PrintWriter(bos); + + Adapter<SearchResult,String> adapterSRtoS = + new SearchResultToCDXLineAdapter(); + Iterator<String> itrS = + new AdaptedIterator<SearchResult,String>(itr,adapterSRtoS); + + while(itrS.hasNext()) { + pw.println(itrS.next()); + } + pw.close(); + boolean added = addCDX(tmpFile); + return added; + } + + /** + * @return the target + */ + public String getTarget() { + return target; + } + + /** + * @param target the target to set + */ + public void setTarget(String target) { + this.target = target; + } + + /** + * @return the tmpDir + */ + public String getTmpDir() { + if(tmpDir == null) { + return null; + } + return tmpDir.getAbsolutePath(); + } + + /** + * @param tmpDir the tmpDir to set + */ + public void setTmpDir(String tmpDir) { + this.tmpDir = new File(tmpDir); + if(!this.tmpDir.isDirectory()) { + this.tmpDir.mkdirs(); + } + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java (from rev 2302, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/indexer/RemoteSubmitFilter.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java 2008-06-24 23:41:39 UTC (rev 2310) @@ -0,0 +1,187 @@ +/* RemoteSubmitFilter + * + * $Id$ + * + * Created on 3:57:00 PM Oct 12, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.updater; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PrintWriter; +import java.util.Enumeration; +import java.util.Properties; + +import javax.servlet.Filter; +import javax.servlet.FilterChain; +import javax.servlet.FilterConfig; +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.ServletRequest; +import javax.servlet.ServletResponse; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +/** + * Filter that accepts PUT HTTP requests to insert CDX files into the incoming + * directory for a local BDBIndex. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class RemoteSubmitFilter implements Filter { + + private final static String INCOMING_PATH = "config-tmp.incoming"; + private final static String HTTP_PUT_METHOD = "PUT"; + private File incoming = null; + private File tmpIncoming = null; + + // TODO: get rid of this + @SuppressWarnings("unchecked") + public void init(FilterConfig c) throws ServletException { + + Properties p = new Properties(); + ServletContext sc = c.getServletContext(); + for (Enumeration e = sc.getInitParameterNames(); e.hasMoreElements();) { + String key = (String) e.nextElement(); + p.put(key, sc.getInitParameter(key)); + } + for (Enumeration e = c.getInitParameterNames(); e.hasMoreElements();) { + String key = (String) e.nextElement(); + p.put(key, c.getInitParameter(key)); + } + + String cfgName = INCOMING_PATH; + String incomingPath = p.getProperty(cfgName); + if((incomingPath == null) || incomingPath.length() == 0) { + throw new ServletException("Invalid or missing " + cfgName + + " configuration"); + } + incoming = new File(incomingPath); + tmpIncoming = new File(incoming,"tmp"); + try { + ensureDir(incoming); + ensureDir(tmpIncoming); + } catch (IOException e) { + throw new ServletException(e); + } + } + private void ensureDir(File dir) throws IOException { + if(dir.exists()) { + if(!dir.isDirectory()) { + throw new IOException("Path " + dir.getAbsolutePath() + + "exists but is not a directory."); + } + } else { + if(!dir.mkdirs()) { + throw new IOException("FAILED mkdir " + dir.getAbsolutePath()); + } + } + } + + /* + * (non-Javadoc) + * + * @see javax.servlet.Filter#doFilter(javax.servlet.ServletRequest, + * javax.servlet.ServletResponse, javax.servlet.FilterChain) + */ + public void doFilter(ServletRequest request, ServletResponse response, + FilterChain chain) throws IOException, ServletException { + if (!handle(request, response)) { + chain.doFilter(request, response); + } + } + /** + * @param request + * @param response + * @return boolean, true unless something went wrong.. + * @throws IOException + * @throws ServletException + */ + protected boolean handle(final ServletRequest request, + final ServletResponse response) throws IOException, + ServletException { + if (!(request instanceof HttpServletRequest)) { + return false; + } + if (!(response instanceof HttpServletResponse)) { + return false; + } + HttpServletRequest httpRequest = (HttpServletRequest) request; + if(httpRequest.getMethod().equals(HTTP_PUT_METHOD)) { + + return handlePut(httpRequest,response); + + } + return false; + } + + protected boolean handlePut(final HttpServletRequest request, + final ServletResponse response) throws IOException, + ServletException { + + String reqURI = request.getRequestURI(); + int lastSlashIdx = reqURI.lastIndexOf("/"); + if (lastSlashIdx == -1) { + return false; + } + String targetFileName = reqURI.substring(lastSlashIdx + 1); + String tmpFileName = targetFileName + ".tmp"; + File tmpFile = new File(tmpIncoming,tmpFileName); + File targetFile = new File(incoming, targetFileName); + + int i; + InputStream input; + input = request.getInputStream(); + BufferedInputStream in = new BufferedInputStream(input); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + FileWriter out = new FileWriter(tmpFile); + + while ((i = reader.read()) != -1) { + out.write(i); + } + + out.close(); + in.close(); + if (!tmpFile.renameTo(targetFile)) { + throw new IOException("Unable to rename " + + tmpFile.getAbsolutePath() + " to " + + targetFile.getAbsolutePath()); + } + + PrintWriter outHTML = response.getWriter(); + outHTML.println("done"); + return true; + } + + /* (non-Javadoc) + * @see javax.servlet.Filter#destroy() + */ + public void destroy() { + // TODO Auto-generated method stub + + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:33:26
|
Revision: 2364 http://archive-access.svn.sourceforge.net/archive-access/?rev=2364&view=rev Author: bradtofel Date: 2008-07-01 16:33:35 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -33,7 +33,7 @@ import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.UpdatableSearchResultSource; @@ -66,9 +66,10 @@ initializeDB(bdbPath,bdbName); } - private CloseableIterator<SearchResult> adaptIterator( + private CloseableIterator<CaptureSearchResult> adaptIterator( Iterator<BDBRecord> itr) { - return new AdaptedIterator<BDBRecord,SearchResult>(itr,new BDBRecordToSearchResultAdapter()); + return new AdaptedIterator<BDBRecord,CaptureSearchResult>(itr, + new BDBRecordToSearchResultAdapter()); } /* @@ -76,7 +77,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { try { @@ -91,7 +92,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(recordIterator(prefix,false)); @@ -103,20 +104,20 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException { + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException { c.close(); } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.UpdatableSearchResultSource#addSearchResults(java.util.Iterator) */ - public void addSearchResults(Iterator<SearchResult> itr, + public void addSearchResults(Iterator<CaptureSearchResult> itr, UrlCanonicalizer canonicalizer) throws IOException { - Adapter<SearchResult,BDBRecord> adapterSRtoBDB = + Adapter<CaptureSearchResult,BDBRecord> adapterSRtoBDB = new SearchResultToBDBRecordAdapter(canonicalizer); Iterator<BDBRecord> itrBDB = - new AdaptedIterator<SearchResult,BDBRecord>(itr, + new AdaptedIterator<CaptureSearchResult,BDBRecord>(itr, adapterSRtoBDB); insertRecords(itrBDB); @@ -158,8 +159,8 @@ if(op.compareTo("-r") == 0) { PrintWriter pw = new PrintWriter(System.out); - CloseableIterator<SearchResult> itrSR = null; - Adapter<SearchResult,String> adapter = + CloseableIterator<CaptureSearchResult> itrSR = null; + Adapter<CaptureSearchResult,String> adapter = new SearchResultToCDXLineAdapter(); CloseableIterator<String> itrS; @@ -171,7 +172,7 @@ e.printStackTrace(); System.exit(1); } - itrS = new AdaptedIterator<SearchResult,String>(itrSR,adapter); + itrS = new AdaptedIterator<CaptureSearchResult,String>(itrSR,adapter); while(itrS.hasNext()) { String line = itrS.next(); if(!line.startsWith(prefix)) { @@ -187,7 +188,7 @@ e.printStackTrace(); System.exit(1); } - itrS = new AdaptedIterator<SearchResult,String>(itrSR,adapter); + itrS = new AdaptedIterator<CaptureSearchResult,String>(itrSR,adapter); while(itrS.hasNext()) { pw.println(itrS.next()); @@ -212,11 +213,11 @@ RecordIterator itrS = new RecordIterator(br); - Adapter<String,SearchResult> adapterStoSR = + Adapter<String,CaptureSearchResult> adapterStoSR = new CDXLineToSearchResultAdapter(); - Iterator<SearchResult> itrSR = - new AdaptedIterator<String,SearchResult>(itrS,adapterStoSR); + Iterator<CaptureSearchResult> itrSR = + new AdaptedIterator<String,CaptureSearchResult>(itrS,adapterStoSR); try { index.addSearchResults(itrSR, canonicalizer); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -27,7 +27,7 @@ import java.io.UnsupportedEncodingException; import org.archive.wayback.bdb.BDBRecord; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; import org.archive.wayback.util.Adapter; @@ -38,7 +38,7 @@ * @version $Date$, $Revision$ */ public class BDBRecordToSearchResultAdapter - implements Adapter<BDBRecord,SearchResult> { + implements Adapter<BDBRecord,CaptureSearchResult> { private static int DEFAULT_SB_SIZE = 100; private StringBuilder sb; @@ -53,7 +53,7 @@ * @param record * @return SearchResult representation of input BDBRecord */ - public SearchResult adapt(BDBRecord record) { + public CaptureSearchResult adapt(BDBRecord record) { sb.setLength(0); try { String key = new String(record.getKey().getData(),"UTF-8"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/SearchResultToBDBRecordAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -28,10 +28,9 @@ import org.apache.commons.httpclient.URIException; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.bdb.BDBRecord; import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; import com.sleepycat.je.DatabaseEntry; @@ -43,7 +42,7 @@ * @version $Date$, $Revision$ */ public class SearchResultToBDBRecordAdapter implements - Adapter<SearchResult,BDBRecord> { + Adapter<CaptureSearchResult,BDBRecord> { private static final Logger LOGGER = Logger.getLogger(SearchResultToBDBRecordAdapter.class.getName()); @@ -66,11 +65,11 @@ * * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public BDBRecord adapt(SearchResult result) { + public BDBRecord adapt(CaptureSearchResult result) { StringBuilder keySB = new StringBuilder(40); StringBuilder valSB = new StringBuilder(100); - String origUrl = result.getAbsoluteUrl(); + String origUrl = result.getOriginalUrl(); String urlKey; try { urlKey = canonicalizer.urlStringToKey(origUrl); @@ -81,22 +80,22 @@ } keySB.append(urlKey); keySB.append(DELIMITER); - keySB.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + keySB.append(result.getCaptureTimestamp()); keySB.append(DELIMITER); - keySB.append(result.get(WaybackConstants.RESULT_OFFSET)); + keySB.append(result.getOffset()); keySB.append(DELIMITER); - keySB.append(result.get(WaybackConstants.RESULT_ARC_FILE)); + keySB.append(result.getFile()); - valSB.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); + valSB.append(result.getOriginalUrl()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); + valSB.append(result.getMimeType()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); + valSB.append(result.getHttpCode()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); + valSB.append(result.getDigest()); valSB.append(DELIMITER); - valSB.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); + valSB.append(result.getRedirectUrl()); key.setData(BDBRecordSet.stringToBytes(keySB.toString())); value.setData(BDBRecordSet.stringToBytes(valSB.toString())); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXIndex.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -26,10 +26,9 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Date; import java.util.Iterator; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.SearchResultSource; import org.archive.wayback.util.AdaptedIterator; @@ -50,15 +49,15 @@ */ private static final long serialVersionUID = 1L; - private CloseableIterator<SearchResult> adaptIterator(Iterator<String> itr) { - return new AdaptedIterator<String,SearchResult>(itr, + private CloseableIterator<CaptureSearchResult> adaptIterator(Iterator<String> itr) { + return new AdaptedIterator<String,CaptureSearchResult>(itr, new CDXLineToSearchResultAdapter()); } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(getRecordIterator(prefix)); @@ -70,7 +69,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { try { return adaptIterator(getReverseRecordIterator(prefix)); @@ -82,10 +81,10 @@ /** * @param prefix - * @return Iterator of SearchResults of records starting with prefix + * @return Iterator of CaptureSearchResult of records starting with prefix * @throws IOException */ - public Iterator<SearchResult> getUrlIterator(final String prefix) throws IOException { + public Iterator<CaptureSearchResult> getUrlIterator(final String prefix) throws IOException { return adaptIterator(getRecordIterator(prefix)); } @@ -95,38 +94,36 @@ * @return Iterator of results in closest order to wantTS * @throws IOException */ - public Iterator<SearchResult> getClosestIterator(final String prefix, - final Timestamp wantTS) throws IOException { + public Iterator<CaptureSearchResult> getClosestIterator(final String prefix, + final Date wantDate) throws IOException { - Iterator<SearchResult> forwardItr = adaptIterator(getRecordIterator(prefix)); - Iterator<SearchResult> reverseItr = adaptIterator(getReverseRecordIterator(prefix)); - Comparator<SearchResult> comparator = new TimestampComparator(wantTS); - CompositeSortedIterator<SearchResult> itr = - new CompositeSortedIterator<SearchResult>(comparator); + Iterator<CaptureSearchResult> forwardItr = adaptIterator(getRecordIterator(prefix)); + Iterator<CaptureSearchResult> reverseItr = adaptIterator(getReverseRecordIterator(prefix)); + Comparator<CaptureSearchResult> comparator = new CaptureSRComparator(wantDate); + CompositeSortedIterator<CaptureSearchResult> itr = + new CompositeSortedIterator<CaptureSearchResult>(comparator); itr.addComponent(forwardItr); itr.addComponent(reverseItr); return itr; } - private class TimestampComparator implements Comparator<SearchResult> { - private int wantedSSE; + private class CaptureSRComparator implements Comparator<CaptureSearchResult> { + private long wantTime; /** * @param wanted */ - public TimestampComparator(Timestamp wanted) { - wantedSSE = wanted.sse(); + public CaptureSRComparator(Date wanted) { + wantTime = wanted.getTime(); } - private int searchResultToDistance(SearchResult sr) { - String dateStr = sr.get(WaybackConstants.RESULT_CAPTURE_DATE); - Timestamp ts = new Timestamp(dateStr); - return Math.abs(wantedSSE - ts.sse()); + private long searchResultToDistance(CaptureSearchResult sr) { + return Math.abs(wantTime - sr.getCaptureDate().getTime()); } /* (non-Javadoc) * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ - public int compare(SearchResult o1, SearchResult o2) { - int d1 = searchResultToDistance(o1); - int d2 = searchResultToDistance(o2); + public int compare(CaptureSearchResult o1, CaptureSearchResult o2) { + long d1 = searchResultToDistance(o1); + long d2 = searchResultToDistance(o2); if(d1 < d2) { return -1; } else if(d1 > d2) { @@ -139,7 +136,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException { + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException { c.close(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -25,76 +25,53 @@ package org.archive.wayback.resourceindex.cdx; -import org.apache.commons.httpclient.URIException; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; /** - * Adapter that converts a CDX record String into a SearchResult + * Adapter that converts a CDX record String into a CaptureSearchResult * * @author brad * @version $Date$, $Revision$ */ -public class CDXLineToSearchResultAdapter implements Adapter<String,SearchResult> { +public class CDXLineToSearchResultAdapter implements Adapter<String,CaptureSearchResult> { - public SearchResult adapt(String line) { + public CaptureSearchResult adapt(String line) { return doAdapt(line); } /** * @param line * @return SearchResult representation of input line */ - public static SearchResult doAdapt(String line) { - SearchResult result = new SearchResult(); + public static CaptureSearchResult doAdapt(String line) { + CaptureSearchResult result = new CaptureSearchResult(); String[] tokens = line.split(" "); if (tokens.length != 9) { return null; //throw new IllegalArgumentException("Need 9 columns("+line+")"); } - String url = tokens[0]; - String captureDate = tokens[1]; - String origHost = tokens[2]; + String urlKey = tokens[0]; + String captureTS = tokens[1]; + String originalUrl = tokens[2]; String mimeType = tokens[3]; - String httpResponseCode = tokens[4]; - String md5Fragment = tokens[5]; + String httpCode = tokens[4]; + String digest = tokens[5]; String redirectUrl = tokens[6]; long compressedOffset = -1; if(!tokens[7].equals("-")) { compressedOffset = Long.parseLong(tokens[7]); } - String arcFileName = tokens[8]; + String fileName = tokens[8]; + result.setUrlKey(urlKey); + result.setCaptureTimestamp(captureTS); + result.setOriginalUrl(originalUrl); + result.setMimeType(mimeType); + result.setHttpCode(httpCode); + result.setDigest(digest); + result.setRedirectUrl(redirectUrl); + result.setOffset(compressedOffset); + result.setFile(fileName); - String origUrl = url; - if(!url.startsWith(WaybackConstants.DNS_URL_PREFIX)) { - try { - UURI uri = UURIFactory.getInstance( - WaybackConstants.HTTP_URL_PREFIX + url); - if(uri.getPort() != -1) { - origHost += ":" + uri.getPort(); - } - origUrl = origHost + uri.getEscapedPathQuery(); - } catch (URIException e) { - // TODO Stifle? throw an error? - e.printStackTrace(); - return null; - } - } - - result.put(WaybackConstants.RESULT_URL, origUrl); - result.put(WaybackConstants.RESULT_URL_KEY, url); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, captureDate); - result.put(WaybackConstants.RESULT_ORIG_HOST, origHost); - result.put(WaybackConstants.RESULT_MIME_TYPE, mimeType); - result.put(WaybackConstants.RESULT_HTTP_CODE, httpResponseCode); - result.put(WaybackConstants.RESULT_MD5_DIGEST, md5Fragment); - result.put(WaybackConstants.RESULT_REDIRECT_URL, redirectUrl); - // HACKHACK: - result.put(WaybackConstants.RESULT_OFFSET, String.valueOf(compressedOffset)); - result.put(WaybackConstants.RESULT_ARC_FILE, arcFileName); - return result; } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -26,8 +26,7 @@ import java.util.Iterator; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; @@ -38,7 +37,7 @@ * @version $Date$, $Revision$ */ public class SearchResultToCDXLineAdapter implements -Adapter<SearchResult,String>{ +Adapter<CaptureSearchResult,String>{ private static int DEFAULT_CAPACITY = 120; private final static String DELIMITER = " "; @@ -46,33 +45,33 @@ /* (non-Javadoc) * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ - public String adapt(SearchResult result) { + public String adapt(CaptureSearchResult result) { StringBuilder sb = new StringBuilder(DEFAULT_CAPACITY); - sb.append(result.get(WaybackConstants.RESULT_URL_KEY)); + sb.append(result.getUrlKey()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_CAPTURE_DATE)); + sb.append(result.getCaptureTimestamp()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_ORIG_HOST)); + sb.append(result.getOriginalUrl()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_MIME_TYPE)); + sb.append(result.getMimeType()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_HTTP_CODE)); + sb.append(result.getHttpCode()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_MD5_DIGEST)); + sb.append(result.getDigest()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_REDIRECT_URL)); + sb.append(result.getRedirectUrl()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_OFFSET)); + sb.append(result.getOffset()); sb.append(DELIMITER); - sb.append(result.get(WaybackConstants.RESULT_ARC_FILE)); + sb.append(result.getFile()); return sb.toString(); } - public static Iterator<String> adapt(Iterator<SearchResult> input) { - return new AdaptedIterator<SearchResult,String>(input, + public static Iterator<String> adapt(Iterator<CaptureSearchResult> input) { + return new AdaptedIterator<CaptureSearchResult,String>(input, new SearchResultToCDXLineAdapter()); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java 2008-07-01 23:32:29 UTC (rev 2363) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/dynamic/DynamicCDXIndex.java 2008-07-01 23:33:35 UTC (rev 2364) @@ -35,7 +35,7 @@ import java.util.logging.Logger; import java.util.regex.Pattern; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.FileDownloader; @@ -151,7 +151,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { if(getState() != STATE_SYNCHED) { throw new ResourceIndexNotAvailableException("Not synchronized"); @@ -164,7 +164,7 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(String prefix) throws ResourceIndexNotAvailableException { if(getState() != STATE_SYNCHED) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:34:43
|
Revision: 2365 http://archive-access.svn.sourceforge.net/archive-access/?rev=2365&view=rev Author: bradtofel Date: 2008-07-01 16:34:52 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/CompositeSearchResultSource.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -29,7 +29,7 @@ import java.util.Comparator; import java.util.List; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.cdx.CDXIndex; import org.archive.wayback.util.CloseableIterator; @@ -67,12 +67,12 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixIterator(String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(String prefix) throws ResourceIndexNotAvailableException { - Comparator<SearchResult> comparator = new SearchResultComparator(); - CompositeSortedIterator<SearchResult> itr = - new CompositeSortedIterator<SearchResult>(comparator); + Comparator<CaptureSearchResult> comparator = new SearchResultComparator(); + CompositeSortedIterator<CaptureSearchResult> itr = + new CompositeSortedIterator<CaptureSearchResult>(comparator); for (int i = 0; i < sources.size(); i++) { itr.addComponent(sources.get(i).getPrefixIterator(prefix)); } @@ -84,12 +84,12 @@ * * @see org.archive.wayback.resourceindex.SearchResultSource#getPrefixReverseIterator(java.lang.String) */ - public CloseableIterator<SearchResult> getPrefixReverseIterator( + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator( String prefix) throws ResourceIndexNotAvailableException { - Comparator<SearchResult> comparator = new SearchResultComparator(true); - CompositeSortedIterator<SearchResult> itr = - new CompositeSortedIterator<SearchResult>(comparator); + Comparator<CaptureSearchResult> comparator = new SearchResultComparator(true); + CompositeSortedIterator<CaptureSearchResult> itr = + new CompositeSortedIterator<CaptureSearchResult>(comparator); for (int i = 0; i < sources.size(); i++) { itr.addComponent(sources.get(i).getPrefixReverseIterator(prefix)); } @@ -99,7 +99,7 @@ /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultSource#cleanup(org.archive.wayback.util.CleanableIterator) */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException{ + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException{ c.close(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/DeduplicationSearchResultAnnotationAdapter.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -2,8 +2,7 @@ import java.util.HashMap; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.Adapter; /** @@ -25,46 +24,38 @@ * @version $Date$, $Revision$ */ public class DeduplicationSearchResultAnnotationAdapter -implements Adapter<SearchResult,SearchResult> { +implements Adapter<CaptureSearchResult,CaptureSearchResult> { private final static String EMPTY_VALUE = "-"; - // these fields are all copied to deduped records as-is: - private final static String FIELDS[] = { - WaybackConstants.RESULT_ARC_FILE, - WaybackConstants.RESULT_OFFSET, - WaybackConstants.RESULT_HTTP_CODE, - WaybackConstants.RESULT_MIME_TYPE, - WaybackConstants.RESULT_REDIRECT_URL, - }; - private HashMap<String,SearchResult> memory = null; + private HashMap<String,CaptureSearchResult> memory = null; public DeduplicationSearchResultAnnotationAdapter() { - memory = new HashMap<String,SearchResult>(); + memory = new HashMap<String,CaptureSearchResult>(); } - private SearchResult annotate(SearchResult o) { - String thisDigest = o.get(WaybackConstants.RESULT_MD5_DIGEST); - SearchResult last = memory.get(thisDigest); + private CaptureSearchResult annotate(CaptureSearchResult o) { + String thisDigest = o.getDigest(); + CaptureSearchResult last = memory.get(thisDigest); if(last == null) { + // TODO: log missing record digest reference return null; } - for(String field : FIELDS) { - o.put(field, last.get(field)); - } - o.put(WaybackConstants.RESULT_DUPLICATE_ANNOTATION, - WaybackConstants.RESULT_DUPLICATE_DIGEST); - o.put(WaybackConstants.RESULT_DUPLICATE_STORED_DATE, - last.get(WaybackConstants.RESULT_CAPTURE_DATE)); + o.setFile(last.getFile()); + o.setOffset(last.getOffset()); + o.setHttpCode(last.getHttpCode()); + o.setMimeType(last.getMimeType()); + o.setRedirectUrl(last.getRedirectUrl()); + o.flagDuplicateDigest(last.getCaptureTimestamp()); return o; } - private SearchResult remember(SearchResult o) { - memory.put(o.get(WaybackConstants.RESULT_MD5_DIGEST),o); + private CaptureSearchResult remember(CaptureSearchResult o) { + memory.put(o.getDigest(),o); return o; } - public SearchResult adapt(SearchResult o) { - if(o.get(FIELDS[0]).equals(EMPTY_VALUE)) { + public CaptureSearchResult adapt(CaptureSearchResult o) { + if(o.getFile().equals(EMPTY_VALUE)) { return annotate(o); } return remember(o); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultComparator.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -26,8 +26,7 @@ import java.util.Comparator; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; /** * @@ -35,7 +34,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class SearchResultComparator implements Comparator<SearchResult> { +public class SearchResultComparator implements Comparator<CaptureSearchResult> { private boolean backwards; /** @@ -52,15 +51,15 @@ backwards = false; } - private String objectToKey(SearchResult r) { - String urlKey = r.get(WaybackConstants.RESULT_URL_KEY); - String captureDate = r.get(WaybackConstants.RESULT_CAPTURE_DATE); + private String objectToKey(CaptureSearchResult r) { + String urlKey = r.getUrlKey(); + String captureDate = r.getCaptureTimestamp(); return urlKey + " " + captureDate; } /* (non-Javadoc) * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ - public int compare(SearchResult o1, SearchResult o2) { + public int compare(CaptureSearchResult o1, CaptureSearchResult o2) { String k1 = objectToKey(o1); String k2 = objectToKey(o2); if(backwards) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SearchResultSource.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -26,7 +26,7 @@ import java.io.IOException; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.util.CloseableIterator; @@ -44,7 +44,7 @@ * results. * @throws ResourceIndexNotAvailableException */ - public CloseableIterator<SearchResult> getPrefixIterator(final String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixIterator(final String prefix) throws ResourceIndexNotAvailableException; /** @@ -54,14 +54,14 @@ * results. * @throws ResourceIndexNotAvailableException */ - public CloseableIterator<SearchResult> getPrefixReverseIterator(final String prefix) + public CloseableIterator<CaptureSearchResult> getPrefixReverseIterator(final String prefix) throws ResourceIndexNotAvailableException; /** * @param c * @throws IOException */ - public void cleanup(CloseableIterator<SearchResult> c) throws IOException; + public void cleanup(CloseableIterator<CaptureSearchResult> c) throws IOException; /** * @param c Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java 2008-07-01 23:33:35 UTC (rev 2364) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/UpdatableSearchResultSource.java 2008-07-01 23:34:52 UTC (rev 2365) @@ -28,7 +28,7 @@ import java.util.Iterator; import org.archive.wayback.UrlCanonicalizer; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; /** * @@ -37,6 +37,6 @@ * @version $Date$, $Revision$ */ public interface UpdatableSearchResultSource extends SearchResultSource { - public void addSearchResults(Iterator<SearchResult> itr, + public void addSearchResults(Iterator<CaptureSearchResult> itr, UrlCanonicalizer canonicalizer) throws IOException; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:48:38
|
Revision: 2377 http://archive-access.svn.sourceforge.net/archive-access/?rev=2377&view=rev Author: bradtofel Date: 2008-07-01 16:48:47 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult required non-trivial changes, but that was the only tangible result. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2008-07-01 23:47:50 UTC (rev 2376) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) @@ -36,8 +36,8 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; @@ -77,7 +77,7 @@ private static final String NUTCH_DIGEST = "digest"; private static final String NUTCH_PRIMARY_TYPE = "primaryType"; private static final String NUTCH_SUB_TYPE = "subType"; - private static final String NUTCH_CAPTURE_HOST = "site"; +// private static final String NUTCH_CAPTURE_HOST = "site"; private static final String NUTCH_CAPTURE_URL = "link"; private static final String NUTCH_SEARCH_RESULT_TAG = "item"; @@ -129,7 +129,7 @@ e.getMessage()); } - SearchResults results; + CaptureSearchResults results; String type = wbRequest.get(WaybackConstants.REQUEST_TYPE); if(type.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || type.equals(WaybackConstants.REQUEST_URL_QUERY)) { @@ -157,21 +157,21 @@ Element e = (Element) nodes.item(i); - SearchResult result = elementToSearchResult(e); + CaptureSearchResult result = elementToSearchResult(e); results.addSearchResult(result); } Element channelElement = (Element) channel.item(0); - results.putFilter(WaybackConstants.RESULTS_FIRST_RETURNED, + results.putFilter(SearchResults.RESULTS_FIRST_RETURNED, getNodeContent(channelElement,NUTCH_FIRST_RESULT)); - results.putFilter(WaybackConstants.RESULTS_NUM_RESULTS, + results.putFilter(SearchResults.RESULTS_NUM_RESULTS, getNodeContent(channelElement,NUTCH_NUM_RESULTS)); - results.putFilter(WaybackConstants.RESULTS_NUM_RETURNED, + results.putFilter(SearchResults.RESULTS_NUM_RETURNED, getNodeContent(channelElement,NUTCH_NUM_RETURNED)); - results.putFilter(WaybackConstants.RESULTS_REQUESTED, + results.putFilter(SearchResults.RESULTS_REQUESTED, String.valueOf(wbRequest.getResultsPerPage())); results.putFilter(WaybackConstants.REQUEST_START_DATE, @@ -182,13 +182,12 @@ return results; } - private SearchResult elementToSearchResult(Element e) + private CaptureSearchResult elementToSearchResult(Element e) throws ResourceIndexNotAvailableException { - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); - result.put(WaybackConstants.RESULT_ARC_FILE, - getNodeNutchContent(e,NUTCH_ARCNAME)); + result.setFile(getNodeNutchContent(e,NUTCH_ARCNAME)); // The date in nutchwax is now named 'tstamp' and its // 17 characters rather than 14. Pass first 14 only. @@ -202,27 +201,21 @@ if (d.length() == 17) { d = d.substring(0, 14); } - result.put(WaybackConstants.RESULT_CAPTURE_DATE, d); + result.setCaptureTimestamp(d); //result.put(WaybackConstants.RESULT_HTTP_CODE,getNodeContent(e,"")); - result.put(WaybackConstants.RESULT_HTTP_CODE,NUTCH_DEFAULT_HTTP_CODE); - result.put(WaybackConstants.RESULT_MD5_DIGEST, - getNodeNutchContent(e,NUTCH_DIGEST)); + result.setHttpCode(NUTCH_DEFAULT_HTTP_CODE); + result.setDigest(getNodeNutchContent(e,NUTCH_DIGEST)); - result.put(WaybackConstants.RESULT_MIME_TYPE, - getNodeNutchContent(e,NUTCH_PRIMARY_TYPE) + "/" + + result.setMimeType(getNodeNutchContent(e,NUTCH_PRIMARY_TYPE) + "/" + getNodeNutchContent(e,NUTCH_SUB_TYPE)); - result.put(WaybackConstants.RESULT_OFFSET, - getNodeNutchContent(e,NUTCH_ARCOFFSET)); + result.setOffset(Long.parseLong(getNodeNutchContent(e,NUTCH_ARCOFFSET))); - result.put(WaybackConstants.RESULT_ORIG_HOST, - getNodeNutchContent(e,NUTCH_CAPTURE_HOST)); -// result.put(WaybackConstants.RESULT_REDIRECT_URL,getNodeContent(e,"")); - result.put(WaybackConstants.RESULT_REDIRECT_URL, - NUTCH_DEFAULT_REDIRECT_URL); - result.put(WaybackConstants.RESULT_URL,getNodeContent(e, - NUTCH_CAPTURE_URL)); + result.setRedirectUrl(NUTCH_DEFAULT_REDIRECT_URL); + result.setCaptureTimestamp(getNodeContent(e,NUTCH_CAPTURE_URL)); + result.setOriginalUrl(getNodeContent(e,NUTCH_CAPTURE_URL)); + result.setUrlKey(getNodeContent(e,NUTCH_CAPTURE_URL)); return result; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-07-01 23:47:50 UTC (rev 2376) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-07-01 23:48:47 UTC (rev 2377) @@ -35,9 +35,11 @@ import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.SearchResults; +import org.archive.wayback.core.UrlSearchResult; import org.archive.wayback.core.UrlSearchResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AccessControlException; @@ -122,13 +124,13 @@ throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { - +// throw new ResourceIndexNotAvailableException("oops"); return urlToSearchResults(getRequestUrl(wbRequest), getSearchResultFilters(wbRequest)); } protected SearchResults urlToSearchResults(String requestUrl, - ObjectFilter<SearchResult> filter) + ObjectFilter<CaptureSearchResult> filter) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException { @@ -190,11 +192,11 @@ } } - protected ObjectFilter<SearchResult> getSearchResultFilters( + protected ObjectFilter<CaptureSearchResult> getSearchResultFilters( WaybackRequest wbRequest) { String searchType = wbRequest.get(WaybackConstants.REQUEST_TYPE); - ObjectFilterChain<SearchResult> filters = - new ObjectFilterChain<SearchResult>(); + ObjectFilterChain<CaptureSearchResult> filters = + new ObjectFilterChain<CaptureSearchResult>(); if (searchType.equals(WaybackConstants.REQUEST_REPLAY_QUERY) || searchType.equals(WaybackConstants.REQUEST_CLOSEST_QUERY)) { @@ -210,14 +212,14 @@ } protected SearchResults documentToSearchResults(Document document, - ObjectFilter<SearchResult> filter) { + ObjectFilter<CaptureSearchResult> filter) { SearchResults results = null; NodeList filters = getRequestFilters(document); String resultsType = getResultsType(document); if(resultsType.equals(WaybackConstants.RESULTS_TYPE_CAPTURE)) { - results = new CaptureSearchResults(); + results = documentToCaptureSearchResults(document,filter); } else { - results = new UrlSearchResults(); + results = documentToUrlSearchResults(document); } for(int i = 0; i < filters.getLength(); i++) { String key = filters.item(i).getNodeName(); @@ -226,11 +228,26 @@ results.putFilter(key,value); } } - + return results; + } + private UrlSearchResults documentToUrlSearchResults( + Document document) { + UrlSearchResults results = new UrlSearchResults(); NodeList xresults = getSearchResults(document); for(int i = 0; i < xresults.getLength(); i++) { Node xresult = xresults.item(i); - SearchResult result = searchElementToSearchResult(xresult); + UrlSearchResult result = searchElementToUrlSearchResult(xresult); + results.addSearchResult(result, true); + } + return results; + } + private CaptureSearchResults documentToCaptureSearchResults( + Document document, ObjectFilter<CaptureSearchResult> filter) { + CaptureSearchResults results = new CaptureSearchResults(); + NodeList xresults = getSearchResults(document); + for(int i = 0; i < xresults.getLength(); i++) { + Node xresult = xresults.item(i); + CaptureSearchResult result = searchElementToCaptureSearchResult(xresult); int ruling = ObjectFilter.FILTER_INCLUDE; if (filter != null) { @@ -245,11 +262,21 @@ } return results; } + private UrlSearchResult searchElementToUrlSearchResult(Node e) { - private SearchResult searchElementToSearchResult(Node e) { + UrlSearchResult result = new UrlSearchResult(); + addNodeDataToSearchResult(e,result); + return result; + } + private CaptureSearchResult searchElementToCaptureSearchResult(Node e) { - SearchResult result = new SearchResult(); + CaptureSearchResult result = new CaptureSearchResult(); + addNodeDataToSearchResult(e,result); + return result; + } + private void addNodeDataToSearchResult(Node e, SearchResult result) { + NodeList chitlens = e.getChildNodes(); for(int i = 0; i < chitlens.getLength(); i++) { String key = chitlens.item(i).getNodeName(); @@ -258,7 +285,6 @@ result.put(key,value); } } - return result; } protected NodeList getRequestFilters(Document d) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |