From: <bra...@us...> - 2007-07-25 00:27:48
|
Revision: 1866 http://archive-access.svn.sourceforge.net/archive-access/?rev=1866&view=rev Author: bradtofel Date: 2007-07-24 17:27:51 -0700 (Tue, 24 Jul 2007) Log Message: ----------- REFACTOR: removed all references to PropertyConfigurable interface Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2007-07-25 00:26:37 UTC (rev 1865) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2007-07-25 00:27:51 UTC (rev 1866) @@ -28,13 +28,11 @@ import java.net.URL; import java.text.ParseException; import java.util.Date; -import java.util.Properties; import java.util.logging.Logger; import org.apache.commons.httpclient.URIException; import org.archive.io.arc.ARCLocation; import org.archive.io.arc.ARCRecord; -import org.archive.wayback.PropertyConfigurable; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; @@ -42,7 +40,6 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; @@ -55,7 +52,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class LiveWebCache implements PropertyConfigurable { +public class LiveWebCache { private static final Logger LOGGER = Logger.getLogger( LiveWebCache.class.getName()); @@ -65,18 +62,6 @@ private LiveWebLocalResourceIndex index = null; static UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - /* (non-Javadoc) - * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) - */ - public void init(Properties p) throws ConfigurationException { - index = new LiveWebLocalResourceIndex(); - index.init(p); - cacher = new URLCacher(); - cacher.init(p); - arcCacheDir = new ARCCacheDirectory(); - arcCacheDir.init(p); - } - /** * closes all resources (currently unused...) */ @@ -291,4 +276,60 @@ } return resource; } + + /** + * @return the maxFailedCacheMS + */ + public long getMaxFailedCacheMS() { + return maxFailedCacheMS; + } + + /** + * @param maxFailedCacheMS the maxFailedCacheMS to set + */ + public void setMaxFailedCacheMS(long maxFailedCacheMS) { + this.maxFailedCacheMS = maxFailedCacheMS; + } + + /** + * @return the arcCacheDir + */ + public ARCCacheDirectory getArcCacheDir() { + return arcCacheDir; + } + + /** + * @param arcCacheDir the arcCacheDir to set + */ + public void setArcCacheDir(ARCCacheDirectory arcCacheDir) { + this.arcCacheDir = arcCacheDir; + } + + /** + * @return the cacher + */ + public URLCacher getCacher() { + return cacher; + } + + /** + * @param cacher the cacher to set + */ + public void setCacher(URLCacher cacher) { + this.cacher = cacher; + } + + /** + * @return the index + */ + public LiveWebLocalResourceIndex getIndex() { + return index; + } + + /** + * @param index the index to set + */ + public void setIndex(LiveWebLocalResourceIndex index) { + this.index = index; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2007-07-25 00:26:37 UTC (rev 1865) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2007-07-25 00:27:51 UTC (rev 1866) @@ -25,12 +25,9 @@ package org.archive.wayback.liveweb; import java.util.ArrayList; -import java.util.Properties; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.resourceindex.LocalResourceIndex; -import org.archive.wayback.resourceindex.SearchResultSourceFactory; import org.archive.wayback.resourceindex.bdb.BDBIndex; import org.archive.wayback.resourceindex.indexer.SearchResultToBDBRecordAdapter; import org.archive.wayback.util.AdaptedIterator; @@ -43,45 +40,12 @@ * @version $Date$, $Revision$ */ public class LiveWebLocalResourceIndex extends LocalResourceIndex { - private final static String LW_DB_DIR = "liveweb.dbdir"; - private final static String LW_DB_NAME = "liveweb.dbname"; - - private String getProp(Properties p, String key) - throws ConfigurationException { - - if(p.containsKey(key)) { - String v = p.getProperty(key); - if(v == null || v.length() < 1) { - throw new ConfigurationException("Empty configuration " + key); - } - return v; - } else { - throw new ConfigurationException("Missing configuration " + key); - } - - } - public void init(Properties p) throws ConfigurationException { - // use alternate Properties, to differentiate config from normal - // ResourceIndex - Properties newP = new Properties(); - - newP.setProperty(SearchResultSourceFactory.SOURCE_CLASS, - SearchResultSourceFactory.SOURCE_CLASS_BDB); - - newP.setProperty(SearchResultSourceFactory.INDEX_PATH, - getProp(p,LW_DB_DIR)); - - newP.setProperty(SearchResultSourceFactory.DB_NAME, - getProp(p,LW_DB_NAME)); - - super.init(newP); - } - /** * Add a single SearchResult to the index. * @param result */ + @SuppressWarnings("unchecked") public void addSearchResult(SearchResult result) { ArrayList<SearchResult> l = new ArrayList<SearchResult>(); l.add(result); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2007-07-25 00:26:37 UTC (rev 1865) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2007-07-25 00:27:51 UTC (rev 1866) @@ -52,9 +52,6 @@ import org.archive.io.arc.ARCLocation; import org.archive.io.arc.ARCWriter; import org.archive.net.LaxURI; -import org.archive.wayback.PropertyConfigurable; -import org.archive.wayback.core.PropertyConfiguration; -import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.exception.LiveDocumentNotAvailableException; /** @@ -67,13 +64,14 @@ * @author brad * @version $Date$, $Revision$ */ -public class URLCacher implements PropertyConfigurable { +public class URLCacher { private static final Logger LOGGER = Logger.getLogger( URLCacher.class.getName()); private static final String CACHE_PATH = "liveweb.tmp.dir"; protected File tmpDir = null; + @SuppressWarnings("unchecked") private final ThreadLocal tl = new ThreadLocal() { protected synchronized Object initialValue() { HttpClient http = new HttpClient(); @@ -88,16 +86,6 @@ return (HttpClient) tl.get(); } - /* (non-Javadoc) - * @see org.archive.wayback.PropertyConfigurable#init(java.util.Properties) - */ - public void init(Properties p) throws ConfigurationException { - PropertyConfiguration pc = new PropertyConfiguration(p); - tmpDir = pc.getDir(CACHE_PATH,true); - LOGGER.info("URLCacher storing temp files in " + - tmpDir.getAbsolutePath()); - } - private File getTmpFile() { String tmpName; File tmpFile; @@ -241,13 +229,13 @@ URLCacher uc = new URLCacher(); ARCCacheDirectory cache = new ARCCacheDirectory(); - try { - cache.init(p); - uc.init(p); - } catch (ConfigurationException e) { - e.printStackTrace(); - System.exit(1); - } +// try { +//// cache.init(p); +//// uc.init(p); +// } catch (ConfigurationException e) { +// e.printStackTrace(); +// System.exit(1); +// } for(int k = 1; k < args.length; k++) { try { url = new URL(args[k]); @@ -433,4 +421,21 @@ } } + /** + * @return the tmpDir + */ + public String getTmpDir() { + if(tmpDir == null) { + return null; + } + return tmpDir.getAbsolutePath(); + } + + /** + * @param tmpDir the tmpDir to set + */ + public void setTmpDir(String tmpDir) { + this.tmpDir = new File(tmpDir); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-11-29 20:29:22
|
Revision: 2094 http://archive-access.svn.sourceforge.net/archive-access/?rev=2094&view=rev Author: bradtofel Date: 2007-11-29 12:29:26 -0800 (Thu, 29 Nov 2007) Log Message: ----------- INTERFACE: changed to use new ArcResource Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2007-11-28 03:15:42 UTC (rev 2093) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2007-11-29 20:29:26 UTC (rev 2094) @@ -30,15 +30,13 @@ import java.util.List; import java.util.logging.Logger; -import org.archive.io.ArchiveRecord; import org.archive.io.WriterPoolSettings; import org.archive.io.arc.ARCConstants; -import org.archive.io.arc.ARCReader; -import org.archive.io.arc.ARCReaderFactory; -import org.archive.io.arc.ARCRecord; import org.archive.io.arc.ARCWriter; import org.archive.io.arc.ARCWriterPool; import org.archive.wayback.core.Resource; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.ResourceFactory; import org.archive.wayback.util.DirMaker; /** @@ -119,7 +117,6 @@ * @throws IOException */ public Resource getResource(String path, long offset) throws IOException { - Resource resource = null; File arc = new File(path); if(!arc.exists()) { String base = arc.getName(); @@ -132,23 +129,13 @@ } } } - LOGGER.info("Retrieving record at " + offset + " in " + - arc.getAbsolutePath()); - ARCReader reader = null; + arc.getAbsolutePath()); try { - reader = ARCReaderFactory.get(arc,true,offset); - } catch (IOException e) { - throw new RuntimeException(e); + return ResourceFactory.getResource(arc, offset); + } catch (ResourceNotAvailableException e1) { + throw new IOException(e1.getMessage()); } - - ArchiveRecord aRec = reader.get(offset); - if(!(aRec instanceof ARCRecord)) { - throw new IOException("Not ARCRecord..."); - } - ARCRecord rec = (ARCRecord) aRec; - resource = new Resource(rec,reader); - return resource; } private WriterPoolSettings getSettings(final boolean isCompressed, Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2007-11-28 03:15:42 UTC (rev 2093) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2007-11-29 20:29:26 UTC (rev 2094) @@ -43,6 +43,7 @@ import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; +import org.archive.wayback.resourcestore.ArcResource; import org.archive.wayback.util.Adapter; import org.archive.wayback.util.UrlCanonicalizer; @@ -203,15 +204,18 @@ "ARC(" + name + ") at (" + offset + ")"); resource = arcCacheDir.getResource(name, offset); // add the result to the index: - ARCRecord record = (ARCRecord) resource.getArcRecord(); + if(resource instanceof ArcResource) { + ArcResource aResource = (ArcResource) resource; + ARCRecord record = (ARCRecord) aResource.getArcRecord(); - SearchResult result = adapter.adapt(record); - index.addSearchResult(result); - LOGGER.info("Added URL(" + url.toString() + ") in " + - "ARC(" + name + ") at (" + offset + ") to LiveIndex"); + SearchResult result = adapter.adapt(record); + index.addSearchResult(result); + LOGGER.info("Added URL(" + url.toString() + ") in " + + "ARC(" + name + ") at (" + offset + ") to LiveIndex"); - // we just read thru the doc in order to index it. Reset: - resource = arcCacheDir.getResource(name, offset); + // we just read thru the doc in order to index it. Reset: + resource = arcCacheDir.getResource(name, offset); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:27:09
|
Revision: 2359 http://archive-access.svn.sourceforge.net/archive-access/?rev=2359&view=rev Author: bradtofel Date: 2008-07-01 16:27:14 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2008-07-01 23:26:31 UTC (rev 2358) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2008-07-01 23:27:14 UTC (rev 2359) @@ -36,7 +36,7 @@ import org.archive.io.arc.ARCWriterPool; import org.archive.wayback.core.Resource; import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourcestore.ResourceFactory; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; import org.archive.wayback.util.DirMaker; /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-07-01 23:26:31 UTC (rev 2358) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-07-01 23:27:14 UTC (rev 2359) @@ -34,17 +34,17 @@ import org.archive.io.arc.ARCRecord; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.LiveDocumentNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; -import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; -import org.archive.wayback.resourcestore.ArcResource; +import org.archive.wayback.resourcestore.indexer.ARCRecordToSearchResultAdapter; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; /** @@ -100,8 +100,8 @@ return req; } - private boolean isForgedFailRecentEnough(SearchResult result) { - String captureDate = result.get(WaybackConstants.RESULT_CAPTURE_DATE); + private boolean isForgedFailRecentEnough(CaptureSearchResult result) { + String captureDate = result.getCaptureTimestamp(); Timestamp t = new Timestamp(captureDate); long maxAge = System.currentTimeMillis() - maxFailedCacheMS; long failAge = t.getDate().getTime(); @@ -111,27 +111,25 @@ return false; } - private boolean isForgedFailedSearchResult(SearchResult result) { - String arcFile = result.get(WaybackConstants.RESULT_ARC_FILE); + private boolean isForgedFailedSearchResult(CaptureSearchResult result) { + String arcFile = result.getFile(); return arcFile.equals("-"); } - private SearchResult forgeFailedSearchResult(URL url) { - SearchResult result = new SearchResult(); + private CaptureSearchResult forgeFailedSearchResult(URL url) { + CaptureSearchResult result = new CaptureSearchResult(); - result.put(WaybackConstants.RESULT_ARC_FILE, "-"); - result.put(WaybackConstants.RESULT_OFFSET, "0"); + result.setFile("-"); + result.setOffset(0); - result.put(WaybackConstants.RESULT_HTTP_CODE, "0"); + result.setHttpCode("0"); - result.put(WaybackConstants.RESULT_MD5_DIGEST, "-"); - result.put(WaybackConstants.RESULT_MIME_TYPE, "-"); - result.put(WaybackConstants.RESULT_CAPTURE_DATE, - Timestamp.currentTimestamp().getDateStr()); + result.setDigest("-"); + result.setMimeType("-"); + result.setCaptureDate(new Date()); - result.put(WaybackConstants.RESULT_ORIG_HOST, url.getHost()); - result.put(WaybackConstants.RESULT_REDIRECT_URL, "-"); - result.put(WaybackConstants.RESULT_URL, url.toString()); + result.setOriginalUrl(url.toString()); + result.setRedirectUrl("-"); String indexUrl; try { @@ -141,7 +139,7 @@ e.printStackTrace(); indexUrl = url.toString(); } - result.put(WaybackConstants.RESULT_URL_KEY, indexUrl); + result.setUrlKey(indexUrl); return result; } @@ -167,7 +165,7 @@ e.printStackTrace(); throw new IOException(e.getMessage()); } - SearchResult result = results.getClosest(wbRequest); + CaptureSearchResult result = results.getClosest(wbRequest); if(result != null) { if(isForgedFailedSearchResult(result)) { if(isForgedFailRecentEnough(result)) { @@ -178,9 +176,8 @@ throw new ResourceNotInArchiveException("Nope"); } } - String name = (String) result.get(WaybackConstants.RESULT_ARC_FILE); - long offset = Long.parseLong( - (String) result.get(WaybackConstants.RESULT_OFFSET)); + String name = result.getFile(); + long offset = result.getOffset(); resource = arcCacheDir.getResource(name, offset); } return resource; @@ -197,7 +194,7 @@ location = cacher.cache(arcCacheDir, url.toString()); } catch(LiveDocumentNotAvailableException e) { // record the failure, so we can fail early next time: - SearchResult result = forgeFailedSearchResult(url); + CaptureSearchResult result = forgeFailedSearchResult(url); index.addSearchResult(result); LOGGER.info("Added FAIL-URL(" + url.toString() + ") to LiveIndex"); throw e; @@ -213,7 +210,7 @@ ArcResource aResource = (ArcResource) resource; ARCRecord record = (ARCRecord) aResource.getArcRecord(); - SearchResult result = adapter.adapt(record); + CaptureSearchResult result = adapter.adapt(record); index.addSearchResult(result); LOGGER.info("Added URL(" + url.toString() + ") in " + "ARC(" + name + ") at (" + offset + ") to LiveIndex"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2008-07-01 23:26:31 UTC (rev 2358) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2008-07-01 23:27:14 UTC (rev 2359) @@ -27,7 +27,7 @@ import java.io.IOException; import java.util.ArrayList; -import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.LocalResourceIndex; /** @@ -46,10 +46,10 @@ * @throws UnsupportedOperationException */ @SuppressWarnings("unchecked") - public void addSearchResult(SearchResult result) + public void addSearchResult(CaptureSearchResult result) throws UnsupportedOperationException, IOException { - ArrayList<SearchResult> l = new ArrayList<SearchResult>(); + ArrayList<CaptureSearchResult> l = new ArrayList<CaptureSearchResult>(); l.add(result); addSearchResults(l.iterator()); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |