From: <bra...@us...> - 2008-08-09 01:20:49
|
Revision: 2531 http://archive-access.svn.sourceforge.net/archive-access/?rev=2531&view=rev Author: bradtofel Date: 2008-08-09 01:20:56 +0000 (Sat, 09 Aug 2008) Log Message: ----------- RENAME: Http11ResourceStore => SimpleResourceStore (now will use prefix as URL or local path prefix) RENAME: LocalResourceFileResourceStore => LocationDBResourceStore REMOVE: old LocalResourceStore, AutoIndexThread FEATURE: added method to ResourceFactory: getResource(String,long) which guesses if the String argument is an URL or a path, and calls the correct getResource() method. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/SimpleResourceStore.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java 2008-08-08 23:35:06 UTC (rev 2530) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/AutoIndexThread.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -1,219 +0,0 @@ -package org.archive.wayback.resourcestore; - -import java.io.File; -import java.io.IOException; -import java.net.MalformedURLException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.resourceindex.updater.IndexClient; -import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.util.DirMaker; - -/** - * Thread that repeatedly notices new files in the LocalResourceStore, indexes - * those files, and hands them off to a ResourceIndex via an IndexClient - * - * @author brad - * @version $Date$, $Revision$ - */ -public class AutoIndexThread extends Thread { - private static final Logger LOGGER = - Logger.getLogger(AutoIndexThread.class.getName()); - - private final static int DEFAULT_RUN_INTERVAL_MS = 10000; - private LocalResourceStore store = null; - private File workDir = null; - private File queuedDir = null; - private int runInterval = DEFAULT_RUN_INTERVAL_MS; - private IndexClient indexClient = null; - - /** - * @param store - * @param runInterval - */ - public AutoIndexThread() { - super("AutoARCIndexThread"); - super.setDaemon(true); - } - - public void run() { - LOGGER.info("AutoIndexThread is alive."); - int sleepInterval = runInterval; - if(store == null) { - throw new RuntimeException("No LocalResourceStore set"); - } - while (true) { - try { - int numIndexed = indexNewArcs(); - if (numIndexed == 0) { - sleep(sleepInterval); - sleepInterval += runInterval; - } else { - sleepInterval = runInterval; - } - } catch (InterruptedException e) { - e.printStackTrace(); - return; - } - } - } - - /** - * Scan for new ARC files, and index any new files discovered. - * - * There are 3 main steps, which could be broken into separate threads: - * 1) detect new ARCs - * 2) create CDX files for each new ARC - * 3) upload CDX files to target (or rename to local "incoming" directory) - * - * for now these are sequential. - * - * @return number of ARC files indexed - */ - public int indexNewArcs() { - int numIndexed = 0; - try { - queueNewArcsForIndex(); - } catch (IOException e) { - e.printStackTrace(); - } - try { - numIndexed = indexArcs(10); - } catch (MalformedURLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return numIndexed; - } - /** - * Find any new ARC files and queue them for indexing. - * @throws IOException - */ - public void queueNewArcsForIndex() throws IOException { - - // build a HashMap of what has been queued already: - HashMap<String,String> queued = new HashMap<String, String>(); - String entries[] = queuedDir.list(); - if(entries != null) { - for (int i = 0; i < entries.length; i++) { - queued.put(entries[i], "i"); - } - } - // now scan thru arcDir, and make a flag file for anything that was not - // already there: - Iterator<String> files = store.fileNamesIterator(); - if(files != null) { - while(files.hasNext()) { - String fileName = files.next(); - if(!queued.containsKey(fileName)) { - File newQueuedFile = new File(queuedDir,fileName); - File newToBeIndexedFile = new File(workDir,fileName); - newToBeIndexedFile.createNewFile(); - newQueuedFile.createNewFile(); - } - } - } - } - - private String fileNameToBase(final String fileName) { - return fileName; - } - - /** - * Index up to 'max' ARC/WARC files queued for indexing, queueing the - * resulting CDX files for merging with the BDBIndex. - * - * @param indexer - * @param max maximum number to index in this method call, 0 for unlimited - * @return int number of ARC/WARC files indexed - * @throws MalformedURLException - * @throws IOException - */ - public int indexArcs(int max) - throws MalformedURLException, IOException { - - int numIndexed = 0; - String toBeIndexed[] = workDir.list(); - - if (toBeIndexed != null) { - for (int i = 0; i < toBeIndexed.length; i++) { - String fileName = toBeIndexed[i]; - File file = store.getLocalFile(fileName); - if(file != null) { - File workFlagFile = new File(workDir,fileName); - String cdxBase = fileNameToBase(fileName); - - try { - - LOGGER.info("Indexing " + file.getAbsolutePath()); - CloseableIterator<CaptureSearchResult> itr = store.indexFile(file); - - if(indexClient.addSearchResults(cdxBase, itr)) { - if (!workFlagFile.delete()) { - throw new IOException("Unable to delete " - + workFlagFile.getAbsolutePath()); - } - } - itr.close(); - numIndexed++; - } catch (IOException e) { - LOGGER.severe("FAILED index: " + file.getAbsolutePath() - + " cause: " + e.getLocalizedMessage()); - } - if(max > 0 && (numIndexed >= max)) { - break; - } - } - } - } - return numIndexed; - } - - - - public LocalResourceStore getStore() { - return store; - } - - public void setStore(LocalResourceStore store) { - this.store = store; - } - - public String getWorkDir() { - return workDir == null ? null : workDir.getAbsolutePath(); - } - - public void setWorkDir(String workDir) throws IOException { - this.workDir = DirMaker.ensureDir(workDir); - } - - public String getQueuedDir() { - return queuedDir == null ? null : queuedDir.getAbsolutePath(); - } - - public void setQueuedDir(String queuedDir) throws IOException { - this.queuedDir = DirMaker.ensureDir(queuedDir); - } - - public int getRunInterval() { - return runInterval; - } - - public void setRunInterval(int runInterval) { - this.runInterval = runInterval; - } - - public IndexClient getIndexClient() { - return indexClient; - } - - public void setIndexClient(IndexClient indexClient) { - this.indexClient = indexClient; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java 2008-08-08 23:35:06 UTC (rev 2530) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -1,101 +0,0 @@ -/* HttpARCResourceStore - * - * $Id$ - * - * Created on 5:29:56 PM Oct 12, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore; - -import java.io.IOException; -import java.net.URL; - -import org.archive.wayback.ResourceStore; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourcestore.resourcefile.ArcWarcFilenameFilter; -import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; - - -/** - * Implements ResourceStore where ARC/WARCs are accessed via HTTP 1.1 range - * requests. All files are assumed to be "rooted" at a particular HTTP URL, - * within a single directory, implying a file reverse-proxy to connect through - * to actual HTTP ARC/WARC locations. - * - * @author brad - * @version $Date$, $Revision$ - */ -public class Http11ResourceStore implements ResourceStore { - - private String urlPrefix = null; - - - public Resource retrieveResource(CaptureSearchResult result) throws IOException, - ResourceNotAvailableException { - - // extract ARC filename - String fileName = result.getFile(); - if(fileName == null || fileName.length() < 1) { - throw new IOException("No ARC/WARC name in search result..."); - } - - final long offset = result.getOffset(); - if(!fileName.endsWith(ArcWarcFilenameFilter.ARC_SUFFIX) - && !fileName.endsWith(ArcWarcFilenameFilter.ARC_GZ_SUFFIX) - && !fileName.endsWith(ArcWarcFilenameFilter.WARC_SUFFIX) - && !fileName.endsWith(ArcWarcFilenameFilter.WARC_GZ_SUFFIX)) { - fileName = fileName + ArcWarcFilenameFilter.ARC_GZ_SUFFIX; - } - - String fileUrl = urlPrefix + fileName; - Resource r = null; - try { - - r = ResourceFactory.getResource(new URL(fileUrl), offset); - - } catch (IOException e) { - - e.printStackTrace(); - throw new ResourceNotAvailableException("Unable to retrieve", - e.getLocalizedMessage()); - } - return r; - } - - /** - * @return the urlPrefix - */ - public String getUrlPrefix() { - return urlPrefix; - } - - /** - * @param urlPrefix the urlPrefix to set - */ - public void setUrlPrefix(String urlPrefix) { - this.urlPrefix = urlPrefix; - } - - public void shutdown() throws IOException { - // no-op - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java 2008-08-08 23:35:06 UTC (rev 2530) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -1,112 +0,0 @@ -/* LocalResourceFileResourceStore - * - * $Id$ - * - * Created on 6:17:54 PM May 29, 2008. - * - * Copyright (C) 2008 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore; - -import java.io.File; -import java.io.IOException; -import java.net.URL; - -import org.archive.wayback.ResourceStore; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; -import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; - -/** - * Simple ResourceStore implementation, which uses a ResourceFileLocationDB to - * locate ARC/WARC files, that can be remote(via http://) or local paths. - * - * @author brad - * @version $Date$, $Revision$ - */ -public class LocalResourceFileResourceStore implements ResourceStore { - - private ResourceFileLocationDB db = null; - - /* (non-Javadoc) - * @see org.archive.wayback.ResourceStore#retrieveResource(org.archive.wayback.core.SearchResult) - */ - public Resource retrieveResource(CaptureSearchResult result) throws IOException, - ResourceNotAvailableException { - // extract ARC filename - String fileName = result.getFile(); - if(fileName == null || fileName.length() < 1) { - throw new IOException("No ARC/WARC name in search result..."); - } - - String urls[] = db.nameToUrls(fileName); - if(urls == null || urls.length == 0) { - throw new ResourceNotAvailableException("Unable to locate(" + - fileName + ")"); - } - - final long offset = result.getOffset(); - - Resource r = null; - // TODO: attempt multiple threads? - for(String url : urls) { - - try { - - if(url.startsWith("http://")) { - r = ResourceFactory.getResource(new URL(url), offset); - } else { - // assume local path: - r = ResourceFactory.getResource(new File(url), offset); - } - // TODO: attempt to grab the first few KB? The underlying - // InputStreams support mark(), so we could reset() after. - // wait for now, currently this will parse HTTP headers, - // which means we've already read some - - } catch (IOException e) { - e.printStackTrace(); - } - if(r != null) { - break; - } - } - if(r == null) { - throw new ResourceNotAvailableException("Unable to retrieve"); - } - return r; - } - - /* (non-Javadoc) - * @see org.archive.wayback.ResourceStore#shutdown() - */ - public void shutdown() throws IOException { - // NOOP - } - - public ResourceFileLocationDB getDb() { - return db; - } - - public void setDb(ResourceFileLocationDB db) { - this.db = db; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java 2008-08-08 23:35:06 UTC (rev 2530) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceStore.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -1,142 +0,0 @@ -package org.archive.wayback.resourcestore; - -import java.io.File; -import java.io.FilenameFilter; -import java.io.IOException; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -import org.archive.wayback.ResourceStore; -import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.exception.ResourceNotAvailableException; -import org.archive.wayback.resourcestore.indexer.ArcIndexer; -import org.archive.wayback.resourcestore.indexer.WarcIndexer; -import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; -import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.util.DirMaker; - -/** - * Class which implements a local ARC, WARC, ARC.gz, WARC.gz, ResourceStore - * including an optional automatic indexing thread - * - * @author brad - * @version $Date$, $Revision$ - */ -public class LocalResourceStore implements ResourceStore { - - private File dataDir = null; - private AutoIndexThread indexThread = null; - - private ArcIndexer arcIndexer = new ArcIndexer(); - private WarcIndexer warcIndexer = new WarcIndexer(); - public final static String ARC_EXTENSION = ".arc"; - public final static String ARC_GZ_EXTENSION = ".arc.gz"; - public final static String WARC_EXTENSION = ".warc"; - public final static String WARC_GZ_EXTENSION = ".warc.gz"; - public final static String OPEN_EXTENSION = ".open"; - private final static String[] SUFFIXES = { - "", ARC_EXTENSION, ARC_GZ_EXTENSION, WARC_EXTENSION, WARC_GZ_EXTENSION - }; - private FilenameFilter filter = new ArcWarcFilenameFilter(); - - public void init() throws ConfigurationException { - if(indexThread != null) { - indexThread.setStore(this); - indexThread.start(); - } - } - - public File getLocalFile(String fileName) { - // try adding suffixes: empty string is first in the list - File file = null; - for(String suffix : SUFFIXES) { - file = new File(dataDir,fileName + suffix); - if(file.exists() && file.canRead()) { - return file; - } - } - // this might work if the full path is in the index... - file = new File(fileName); - if(file.exists() && file.canRead()) { - return file; - } - // doh. - return null; - } - - public Resource retrieveResource(CaptureSearchResult result) throws IOException, - ResourceNotAvailableException { - String fileName = result.getFile(); - long offset = result.getOffset(); - File file = getLocalFile(fileName); - if (file == null) { - - // TODO: this needs to be prettied up for end user consumption.. - throw new ResourceNotAvailableException("Cannot find ARC file (" - + fileName + ")"); - } else { - - Resource r = ResourceFactory.getResource(file, offset); - return r; - } - } - - public CloseableIterator<CaptureSearchResult> indexFile(File dataFile) throws IOException { - CloseableIterator<CaptureSearchResult> itr = null; - - String name = dataFile.getName(); - if(name.endsWith(ARC_EXTENSION)) { - itr = arcIndexer.iterator(dataFile); - } else if(name.endsWith(ARC_GZ_EXTENSION)) { - itr = arcIndexer.iterator(dataFile); - } else if(name.endsWith(WARC_EXTENSION)) { - itr = warcIndexer.iterator(dataFile); - } else if(name.endsWith(WARC_GZ_EXTENSION)) { - itr = warcIndexer.iterator(dataFile); - } - return itr; - } - - public Iterator<String> fileNamesIterator() throws IOException { - if(dataDir != null) { - String[] files = dataDir.list(filter); - List<String> l = Arrays.asList(files); - return l.iterator(); - } - return null; - } - - public String getDataDir() { - return DirMaker.getAbsolutePath(dataDir); - } - - public void setDataDir(String dataDir) throws IOException { - this.dataDir = DirMaker.ensureDir(dataDir); - } - - private class ArcWarcFilenameFilter implements FilenameFilter { - public boolean accept(File dir, String name) { - File tmp = new File(dir,name); - if(tmp.isFile() && tmp.canRead()) { - return name.endsWith(ARC_EXTENSION) || - name.endsWith(ARC_GZ_EXTENSION) || - name.endsWith(WARC_GZ_EXTENSION) || - name.endsWith(WARC_EXTENSION); - } - return false; - } - } - - public AutoIndexThread getIndexThread() { - return indexThread; - } - public void setIndexThread(AutoIndexThread indexThread) { - this.indexThread = indexThread; - } - public void shutdown() throws IOException { - // no-op. could shut down threads - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java (from rev 2496, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocalResourceFileResourceStore.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -0,0 +1,112 @@ +/* LocalResourceFileResourceStore + * + * $Id$ + * + * Created on 6:17:54 PM May 29, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore; + +import java.io.File; +import java.io.IOException; +import java.net.URL; + +import org.archive.wayback.ResourceStore; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; + +/** + * Simple ResourceStore implementation, which uses a ResourceFileLocationDB to + * locate ARC/WARC files, that can be remote(via http://) or local paths. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class LocationDBResourceStore implements ResourceStore { + + private ResourceFileLocationDB db = null; + + /* (non-Javadoc) + * @see org.archive.wayback.ResourceStore#retrieveResource(org.archive.wayback.core.SearchResult) + */ + public Resource retrieveResource(CaptureSearchResult result) throws IOException, + ResourceNotAvailableException { + // extract ARC filename + String fileName = result.getFile(); + if(fileName == null || fileName.length() < 1) { + throw new IOException("No ARC/WARC name in search result..."); + } + + String urls[] = db.nameToUrls(fileName); + if(urls == null || urls.length == 0) { + throw new ResourceNotAvailableException("Unable to locate(" + + fileName + ")"); + } + + final long offset = result.getOffset(); + + Resource r = null; + // TODO: attempt multiple threads? + for(String url : urls) { + + try { + + if(url.startsWith("http://")) { + r = ResourceFactory.getResource(new URL(url), offset); + } else { + // assume local path: + r = ResourceFactory.getResource(new File(url), offset); + } + // TODO: attempt to grab the first few KB? The underlying + // InputStreams support mark(), so we could reset() after. + // wait for now, currently this will parse HTTP headers, + // which means we've already read some + + } catch (IOException e) { + e.printStackTrace(); + } + if(r != null) { + break; + } + } + if(r == null) { + throw new ResourceNotAvailableException("Unable to retrieve"); + } + return r; + } + + /* (non-Javadoc) + * @see org.archive.wayback.ResourceStore#shutdown() + */ + public void shutdown() throws IOException { + // NOOP + } + + public ResourceFileLocationDB getDb() { + return db; + } + + public void setDb(ResourceFileLocationDB db) { + this.db = db; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/LocationDBResourceStore.java ___________________________________________________________________ Added: svn:mergeinfo + Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/SimpleResourceStore.java (from rev 2496, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/Http11ResourceStore.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/SimpleResourceStore.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/SimpleResourceStore.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -0,0 +1,100 @@ +/* HttpARCResourceStore + * + * $Id$ + * + * Created on 5:29:56 PM Oct 12, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore; + +import java.io.IOException; + +import org.archive.wayback.ResourceStore; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.resourcefile.ArcWarcFilenameFilter; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; + + +/** + * Implements ResourceStore where ARC/WARCs are accessed via HTTP 1.1 range + * requests. All files are assumed to be "rooted" at a particular HTTP URL, + * within a single directory, implying a file reverse-proxy to connect through + * to actual HTTP ARC/WARC locations. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class SimpleResourceStore implements ResourceStore { + + private String prefix = null; + + + public Resource retrieveResource(CaptureSearchResult result) throws IOException, + ResourceNotAvailableException { + + // extract ARC filename + String fileName = result.getFile(); + if(fileName == null || fileName.length() < 1) { + throw new IOException("No ARC/WARC name in search result..."); + } + + final long offset = result.getOffset(); + if(!fileName.endsWith(ArcWarcFilenameFilter.ARC_SUFFIX) + && !fileName.endsWith(ArcWarcFilenameFilter.ARC_GZ_SUFFIX) + && !fileName.endsWith(ArcWarcFilenameFilter.WARC_SUFFIX) + && !fileName.endsWith(ArcWarcFilenameFilter.WARC_GZ_SUFFIX)) { + fileName = fileName + ArcWarcFilenameFilter.ARC_GZ_SUFFIX; + } + + String fileUrl = prefix + fileName; + Resource r = null; + try { + + r = ResourceFactory.getResource(fileUrl, offset); + + } catch (IOException e) { + + e.printStackTrace(); + throw new ResourceNotAvailableException("Unable to retrieve", + e.getLocalizedMessage()); + } + return r; + } + + /** + * @return the prefix + */ + public String getPrefix() { + return prefix; + } + + /** + * @param prefix the prefix to set + */ + public void setPrefix(String prefix) { + this.prefix = prefix; + } + + public void shutdown() throws IOException { + // no-op + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/SimpleResourceStore.java ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision Added: svn:mergeinfo + Added: svn:eol-style + native Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2008-08-08 23:35:06 UTC (rev 2530) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2008-08-09 01:20:56 UTC (rev 2531) @@ -23,6 +23,16 @@ */ public class ResourceFactory { + public static Resource getResource(String urlOrPath, long offset) + throws IOException, ResourceNotAvailableException { + if(urlOrPath.startsWith("http://")) { + return getResource(new URL(urlOrPath), offset); + } else { + // assume local path: + return getResource(new File(urlOrPath), offset); + } + } + public static Resource getResource(File file, long offset) throws IOException, ResourceNotAvailableException { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |