From: <bra...@us...> - 2010-04-02 03:28:34
|
Revision: 3020 http://archive-access.svn.sourceforge.net/archive-access/?rev=3020&view=rev Author: bradtofel Date: 2010-04-02 03:28:28 +0000 (Fri, 02 Apr 2010) Log Message: ----------- BUGFIX(unreported): AdaptedIterator implementation which converted an Iterator<CaptureSearchResult> to an Iterator<UrlSearchResult> was not returning the last record... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2010-04-02 03:19:58 UTC (rev 3019) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2010-04-02 03:28:28 UTC (rev 3020) @@ -43,7 +43,7 @@ import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; -import org.archive.wayback.resourceindex.adapters.CaptureToUrlSearchResultAdapter; +import org.archive.wayback.resourceindex.adapters.CaptureToUrlSearchResultIterator; import org.archive.wayback.resourceindex.filterfactory.AccessPointCaptureFilterGroupFactory; import org.archive.wayback.resourceindex.filterfactory.CaptureFilterGroup; import org.archive.wayback.resourceindex.filterfactory.CoreCaptureFilterGroupFactory; @@ -242,7 +242,7 @@ uFilters.addFilters(window.getFilters()); CloseableIterator<UrlSearchResult> itrU = new ObjectFilterIterator<UrlSearchResult>( - CaptureToUrlSearchResultAdapter.adaptCaptureIterator(itrC), + new CaptureToUrlSearchResultIterator(itrC), uFilters); while(itrU.hasNext()) { Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java 2010-04-02 03:19:58 UTC (rev 3019) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultAdapter.java 2010-04-02 03:28:28 UTC (rev 3020) @@ -1,115 +0,0 @@ -/* CaptureToUrlSearchResultAdapter - * - * $Id$ - * - * Created on 4:45:55 PM Jun 28, 2008. - * - * Copyright (C) 2008 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.adapters; - -import java.util.HashMap; - -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.core.UrlSearchResult; -import org.archive.wayback.util.AdaptedIterator; -import org.archive.wayback.util.Adapter; -import org.archive.wayback.util.CloseableIterator; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class CaptureToUrlSearchResultAdapter - implements Adapter<CaptureSearchResult, UrlSearchResult> { - - private String currentUrl; - private String originalUrl; - private String firstCapture; - private String lastCapture; - private int numCaptures; - private HashMap<String,Object> digests; - private UrlSearchResult resultRef = null; - public CaptureToUrlSearchResultAdapter() { - - } - private UrlSearchResult makeUrlSearchResult(CaptureSearchResult result) { - currentUrl = result.getUrlKey(); - originalUrl = result.getOriginalUrl(); - firstCapture = result.getCaptureTimestamp(); - lastCapture = firstCapture; - digests = new HashMap<String,Object>(); - digests.put(result.getDigest(),null); - numCaptures = 1; - - resultRef = new UrlSearchResult(); - resultRef.setUrlKey(currentUrl); - resultRef.setOriginalUrl(originalUrl); - resultRef.setFirstCapture(firstCapture); - resultRef.setLastCapture(lastCapture); - resultRef.setNumCaptures(1); - resultRef.setNumVersions(1); - return resultRef; - } - - /* (non-Javadoc) - * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) - */ - public UrlSearchResult adapt(CaptureSearchResult c) { - String urlKey = c.getUrlKey(); - if(resultRef == null || !currentUrl.equals(urlKey)) { - return makeUrlSearchResult(c); - } - - // same url -- accumulate into the last one we returned: - String captureDate = c.getCaptureTimestamp(); - if(captureDate.compareTo(firstCapture) < 0) { - firstCapture = captureDate; - resultRef.setFirstCapture(firstCapture); - } - if(captureDate.compareTo(lastCapture) > 0) { - lastCapture = captureDate; - resultRef.setLastCapture(lastCapture); - } - numCaptures++; - digests.put(c.getDigest(), null); - resultRef.setNumCaptures(numCaptures); - resultRef.setNumVersions(digests.size()); - return null; - } - public static CloseableIterator<UrlSearchResult> adaptCaptureIterator( - CloseableIterator<CaptureSearchResult> itr) { - - // HACKHACK: this is pretty lame. We return an UrlSearchResult the - // first time we see a new urlKey, and cache a reference to the returned - // UrlSearchResult, updating it as we see subsequent CaptureSearchResult - // objects with the same urlKey. - // This means that users of the returned UrlSearchResult need to wait - // until they've got the *next* returned UrlSearchResult before using - // the *previous* UrlSearchResult. - // At the moment, this all happens inside a LocalResourceIndex, so - // none of the UrlSearchResult objects should be seen/used in any - // significant way before they've all be accumulated into an - // UrlSearchResults object.. - return new AdaptedIterator<CaptureSearchResult,UrlSearchResult>(itr, - new CaptureToUrlSearchResultAdapter()); - } -} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java 2010-04-02 03:28:28 UTC (rev 3020) @@ -0,0 +1,138 @@ +/* CaptureToUrlSearchResultIterator + * + * $Id$: + * + * Created on Mar 31, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.adapters; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.log4j.Logger; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.UrlSearchResult; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.PeekableIterator; + +/** + * @author brad + * + */ +public class CaptureToUrlSearchResultIterator implements CloseableIterator<UrlSearchResult> { + private static final Logger LOGGER = Logger.getLogger( + CaptureToUrlSearchResultIterator.class.getName()); + private PeekableIterator<CaptureSearchResult> peek = null; + UrlSearchResult cachedNext = null; + /** + * @param itr possibly closeable iterator of CaptureSearchResult objects + */ + public CaptureToUrlSearchResultIterator(Iterator<CaptureSearchResult> itr) { + peek = new PeekableIterator<CaptureSearchResult>(itr); + } + /* (non-Javadoc) + * @see java.util.Iterator#hasNext() + */ + public boolean hasNext() { + createNext(); + return (cachedNext != null); + } + + private void createNext() { + if(cachedNext == null) { + if(peek.hasNext()) { + // populate + CaptureSearchResult captureResult = peek.next(); + String currentKey = captureResult.getUrlKey(); + String originalUrl = captureResult.getOriginalUrl(); + String firstCapture = captureResult.getCaptureTimestamp(); + LOGGER.info("Creating new UrlResult:" + currentKey + " " + + firstCapture); + String lastCapture = firstCapture; + HashMap<String,Object> digests = new HashMap<String,Object>(); + digests.put(captureResult.getDigest(),null); + int numCaptures = 1; + + cachedNext = new UrlSearchResult(); + cachedNext.setUrlKey(currentKey); + cachedNext.setOriginalUrl(originalUrl); + + // now rip through the rest until we find either the last + // in the iterator, or the first having a different urlKey: + while((captureResult = peek.peekNext()) != null) { + String urlKey = captureResult.getUrlKey(); + if(currentKey.equals(urlKey)) { + // remove from iterator, and accumulate: + peek.next(); + numCaptures++; + digests.put(captureResult.getDigest(), null); + + String captureTS = captureResult.getCaptureTimestamp(); + if(captureTS.compareTo(firstCapture) < 0) { + firstCapture = captureTS; + } + if(captureTS.compareTo(lastCapture) > 0) { + lastCapture = captureTS; + } + + } else { + // all done. leave the next result and stop processing: + LOGGER.info("Hit next urlKey. Cur("+currentKey+") new(" + + urlKey + ")"); + break; + } + } + cachedNext.setFirstCapture(firstCapture); + cachedNext.setLastCapture(lastCapture); + cachedNext.setNumCaptures(numCaptures); + cachedNext.setNumVersions(digests.size()); + } + } + } + /* (non-Javadoc) + * @see java.util.Iterator#next() + */ + public UrlSearchResult next() { + if(cachedNext == null) { + throw new NoSuchElementException("use hasNext!"); + } + UrlSearchResult tmp = cachedNext; + cachedNext = null; + return tmp; + } + + /* (non-Javadoc) + * @see java.util.Iterator#remove() + */ + public void remove() { + throw new UnsupportedOperationException(); + } + + /* (non-Javadoc) + * @see java.io.Closeable#close() + */ + public void close() throws IOException { + peek.close(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/adapters/CaptureToUrlSearchResultIterator.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |