Revision: 2642 http://archive-access.svn.sourceforge.net/archive-access/?rev=2642&view=rev Author: bradtofel Date: 2008-11-07 22:45:08 +0000 (Fri, 07 Nov 2008) Log Message: ----------- FEATURE: now adds a SchemeMatchFilter if WaybackRequest specifies it is needed. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-11-07 22:41:50 UTC (rev 2641) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-11-07 22:45:08 UTC (rev 2642) @@ -51,6 +51,7 @@ import org.archive.wayback.resourceindex.filters.EndDateFilter; import org.archive.wayback.resourceindex.filters.GuardRailFilter; import org.archive.wayback.resourceindex.filters.HostMatchFilter; +import org.archive.wayback.resourceindex.filters.SchemeMatchFilter; import org.archive.wayback.resourceindex.filters.SelfRedirectFilter; import org.archive.wayback.resourceindex.filters.UrlMatchFilter; import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter; @@ -63,6 +64,7 @@ import org.archive.wayback.util.ObjectFilterIterator; import org.archive.wayback.util.Timestamp; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.util.url.UrlOperations; /** * @@ -378,6 +380,10 @@ filter.addFilter(exactHost); } + if(request.isExactScheme()) { + filter.addFilter(new SchemeMatchFilter( + UrlOperations.urlToScheme(request.getRequestUrl()))); + } // count how many results got to the ExclusionFilter: filter.addFilter(preExclusionCounter); @@ -417,6 +423,7 @@ } } } + private static HostMatchFilter getExactHostFilter(WaybackRequest r) { HostMatchFilter filter = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2680 http://archive-access.svn.sourceforge.net/archive-access/?rev=2680&view=rev Author: bradtofel Date: 2009-01-29 23:52:10 +0000 (Thu, 29 Jan 2009) Log Message: ----------- BUGFIX(ACC-58): was not adding DateRangeFilter for UrlPrefix queries. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-12-18 19:12:47 UTC (rev 2679) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2009-01-29 23:52:10 UTC (rev 2680) @@ -372,6 +372,7 @@ filter.addFilter(drFilter); } else if(type == TYPE_URL) { filter.addFilter(new UrlPrefixMatchFilter(keyUrl)); + filter.addFilter(drFilter); } else { throw new BadQueryException("Unknown type"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2734 http://archive-access.svn.sourceforge.net/archive-access/?rev=2734&view=rev Author: bradtofel Date: 2009-06-09 21:18:20 +0000 (Tue, 09 Jun 2009) Log Message: ----------- FEATURE: added ConditionalGET annotation capability. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2009-06-09 21:12:27 UTC (rev 2733) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2009-06-09 21:18:20 UTC (rev 2734) @@ -28,8 +28,6 @@ import java.util.Iterator; import org.apache.commons.httpclient.URIException; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; import org.archive.wayback.ResourceIndex; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; @@ -43,12 +41,12 @@ import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.resourceindex.adapters.ConditionalGetAnnotationSearchResultAdapter; import org.archive.wayback.resourceindex.adapters.CaptureToUrlSearchResultAdapter; import org.archive.wayback.resourceindex.adapters.DeduplicationSearchResultAnnotationAdapter; import org.archive.wayback.resourceindex.filters.CounterFilter; import org.archive.wayback.resourceindex.filters.DateRangeFilter; import org.archive.wayback.resourceindex.filters.DuplicateRecordFilter; -import org.archive.wayback.resourceindex.filters.EndDateFilter; import org.archive.wayback.resourceindex.filters.GuardRailFilter; import org.archive.wayback.resourceindex.filters.HostMatchFilter; import org.archive.wayback.resourceindex.filters.SchemeMatchFilter; @@ -101,7 +99,10 @@ CloseableIterator<CaptureSearchResult> captures = source.getPrefixIterator(k); if(dedupeRecords) { + // hack hack!!! captures = new AdaptedIterator<CaptureSearchResult, CaptureSearchResult> + (captures, new ConditionalGetAnnotationSearchResultAdapter()); + captures = new AdaptedIterator<CaptureSearchResult, CaptureSearchResult> (captures, new DeduplicationSearchResultAnnotationAdapter()); } return captures; @@ -126,14 +127,15 @@ CaptureSearchResults results = new CaptureSearchResults(); CaptureQueryFilterState filterState = - new CaptureQueryFilterState(wbRequest,canonicalizer, type, filter); + new CaptureQueryFilterState(wbRequest, canonicalizer, type, + getUserFilters(wbRequest)); String keyUrl = filterState.getKeyUrl(); CloseableIterator<CaptureSearchResult> itr = getCaptureIterator(keyUrl); // set up the common Filters: ObjectFilter<CaptureSearchResult> filter = filterState.getFilter(); itr = new ObjectFilterIterator<CaptureSearchResult>(itr,filter); - + // Windowing: WindowFilterState<CaptureSearchResult> window = new WindowFilterState<CaptureSearchResult>(wbRequest); @@ -154,6 +156,7 @@ cleanupIterator(itr); return results; } + public UrlSearchResults doUrlQuery(WaybackRequest wbRequest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, @@ -163,7 +166,7 @@ CaptureQueryFilterState filterState = new CaptureQueryFilterState(wbRequest,canonicalizer, - CaptureQueryFilterState.TYPE_URL, filter); + CaptureQueryFilterState.TYPE_URL, getUserFilters(wbRequest)); String keyUrl = filterState.getKeyUrl(); CloseableIterator<CaptureSearchResult> citr = getCaptureIterator(keyUrl); @@ -300,6 +303,27 @@ this.filter = filter; } + public ObjectFilterChain<CaptureSearchResult> getUserFilters(WaybackRequest request) { + ObjectFilterChain<CaptureSearchResult> userFilters = + new ObjectFilterChain<CaptureSearchResult>(); + + // has the user asked for only results on the exact host specified? + if(request.isExactHost()) { + userFilters.addFilter(new HostMatchFilter( + UrlOperations.urlToHost(request.getRequestUrl()))); + } + + if(request.isExactScheme()) { + userFilters.addFilter(new SchemeMatchFilter( + UrlOperations.urlToScheme(request.getRequestUrl()))); + } + if(filter != null) { + userFilters.addFilter(filter); + } + + return userFilters; + } + private class CaptureQueryFilterState { public final static int TYPE_REPLAY = 0; public final static int TYPE_CAPTURE = 1; @@ -315,7 +339,7 @@ public CaptureQueryFilterState(WaybackRequest request, UrlCanonicalizer canonicalizer, int type, - ObjectFilter<CaptureSearchResult> genericFilter) + ObjectFilterChain<CaptureSearchResult> userFilter) throws BadQueryException { String searchUrl = request.getRequestUrl(); @@ -346,12 +370,6 @@ preExclusionCounter = new CounterFilter(); DateRangeFilter drFilter = new DateRangeFilter(startDate,endDate); - if(genericFilter != null) { - filter.addFilter(genericFilter); - } - // has the user asked for only results on the exact host specified? - ObjectFilter<CaptureSearchResult> exactHost = - getExactHostFilter(request); // checks an exclusion service for every matching record ObjectFilter<CaptureSearchResult> exclusion = request.getExclusionFilter(); @@ -363,7 +381,7 @@ if(type == TYPE_REPLAY) { filter.addFilter(new UrlMatchFilter(keyUrl)); - filter.addFilter(new EndDateFilter(endDate)); + filter.addFilter(drFilter); SelfRedirectFilter selfRedirectFilter= new SelfRedirectFilter(); selfRedirectFilter.setCanonicalizer(canonicalizer); filter.addFilter(selfRedirectFilter); @@ -377,14 +395,10 @@ throw new BadQueryException("Unknown type"); } - if(exactHost != null) { - filter.addFilter(exactHost); + if(userFilter != null) { + filter.addFilters(userFilter.getFilters()); } - if(request.isExactScheme()) { - filter.addFilter(new SchemeMatchFilter( - UrlOperations.urlToScheme(request.getRequestUrl()))); - } // count how many results got to the ExclusionFilter: filter.addFilter(preExclusionCounter); @@ -425,26 +439,6 @@ } } - private static HostMatchFilter getExactHostFilter(WaybackRequest r) { - - HostMatchFilter filter = null; - if(r.isExactHost()) { - - String searchUrl = r.getRequestUrl(); - try { - - UURI searchURI = UURIFactory.getInstance(searchUrl); - String exactHost = searchURI.getHost(); - filter = new HostMatchFilter(exactHost); - - } catch (URIException e) { - // Really, this isn't gonna happen, we've already canonicalized - // it... should really optimize and do that just once. - e.printStackTrace(); - } - } - return filter; - } private class WindowFilterState<T> { int startResult; // calculated based on hits/page * pagenum int resultsPerPage; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3401 http://archive-access.svn.sourceforge.net/archive-access/?rev=3401&view=rev Author: bradtofel Date: 2011-02-06 14:42:47 +0000 (Sun, 06 Feb 2011) Log Message: ----------- BUGFIX: moved AccessPoint query filters closer to front - too brittle this way.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-02-06 14:41:45 UTC (rev 3400) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-02-06 14:42:47 UTC (rev 3401) @@ -117,9 +117,9 @@ canonicalizer = new AggressiveUrlCanonicalizer(); fgFactories = new ArrayList<FilterGroupFactory>(); fgFactories.add(new CoreCaptureFilterGroupFactory()); + fgFactories.add(new AccessPointCaptureFilterGroupFactory()); + fgFactories.add(new ExclusionCaptureFilterGroupFactory()); fgFactories.add(new QueryCaptureFilterGroupFactory()); - fgFactories.add(new ExclusionCaptureFilterGroupFactory()); - fgFactories.add(new AccessPointCaptureFilterGroupFactory()); } private void cleanupIterator(CloseableIterator<? extends SearchResult> itr) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3552 http://archive-access.svn.sourceforge.net/archive-access/?rev=3552&view=rev Author: bradtofel Date: 2011-10-26 16:15:53 +0000 (Wed, 26 Oct 2011) Log Message: ----------- BUGFIX: moved QueryFilterGroup above AccessPointFilterGroup - we really need that URL prefix to happen before anything that could potentially skip a lot of records.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-10-25 17:54:42 UTC (rev 3551) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-10-26 16:15:53 UTC (rev 3552) @@ -118,8 +118,8 @@ canonicalizer = new AggressiveUrlCanonicalizer(); fgFactories = new ArrayList<FilterGroupFactory>(); fgFactories.add(new CoreCaptureFilterGroupFactory()); + fgFactories.add(new QueryCaptureFilterGroupFactory()); fgFactories.add(new AccessPointCaptureFilterGroupFactory()); - fgFactories.add(new QueryCaptureFilterGroupFactory()); fgFactories.add(new ExclusionCaptureFilterGroupFactory()); fgFactories.add(new ClosestTrackingCaptureFilterGroupFactory()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3573 http://archive-access.svn.sourceforge.net/archive-access/?rev=3573&view=rev Author: ikreymer Date: 2011-11-29 04:40:21 +0000 (Tue, 29 Nov 2011) Log Message: ----------- BUGFIX: Rearranging filters again, seems like QueryCaptureFilterGroup needs to be before AccessPointCaptureFilter group, which needs to be before CoreCaptureFilterGroup, may need to revisit this Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-11-29 03:45:09 UTC (rev 3572) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2011-11-29 04:40:21 UTC (rev 3573) @@ -117,9 +117,9 @@ public LocalResourceIndex() { canonicalizer = new AggressiveUrlCanonicalizer(); fgFactories = new ArrayList<FilterGroupFactory>(); + fgFactories.add(new QueryCaptureFilterGroupFactory()); fgFactories.add(new AccessPointCaptureFilterGroupFactory()); fgFactories.add(new CoreCaptureFilterGroupFactory()); - fgFactories.add(new QueryCaptureFilterGroupFactory()); fgFactories.add(new ExclusionCaptureFilterGroupFactory()); fgFactories.add(new ClosestTrackingCaptureFilterGroupFactory()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3625 http://archive-access.svn.sourceforge.net/archive-access/?rev=3625&view=rev Author: ikreymer Date: 2012-03-06 01:25:13 +0000 (Tue, 06 Mar 2012) Log Message: ----------- FIX: Make LocalResourceIndex more subclass-friendly by making factories and filter setup protected instead of private Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2012-03-01 08:47:49 UTC (rev 3624) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2012-03-06 01:25:13 UTC (rev 3625) @@ -113,7 +113,7 @@ private ObjectFilter<CaptureSearchResult> filter = null; - List<FilterGroupFactory> fgFactories = null; + protected List<FilterGroupFactory> fgFactories = null; public LocalResourceIndex() { canonicalizer = new AggressiveUrlCanonicalizer(); @@ -137,7 +137,7 @@ } } - private List<CaptureFilterGroup> getRequestFilterGroups(WaybackRequest r) + protected List<CaptureFilterGroup> getRequestFilterGroups(WaybackRequest r) throws BadQueryException { ArrayList<CaptureFilterGroup> groups = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |