From: <bra...@us...> - 2008-06-24 23:43:51
|
Revision: 2311 http://archive-access.svn.sourceforge.net/archive-access/?rev=2311&view=rev Author: bradtofel Date: 2008-06-24 16:43:59 -0700 (Tue, 24 Jun 2008) Log Message: ----------- REFACTOR: Renaming to support other future UpdatableResourceIndex implementations. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java 2008-06-24 23:41:39 UTC (rev 2310) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java 2008-06-24 23:43:59 UTC (rev 2311) @@ -1,401 +0,0 @@ -/* BDBIndexUpdater - * - * $Id$ - * - * Created on 2:59:40 PM Oct 12, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.updater; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.archive.wayback.bdb.BDBRecord; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.resourceindex.bdb.BDBIndex; -import org.archive.wayback.resourceindex.bdb.SearchResultToBDBRecordAdapter; -import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; -//import org.archive.wayback.resourcestore.ArcIndexer; -import org.archive.wayback.util.AdaptedIterator; -import org.archive.wayback.util.flatfile.FlatFile; - -/** - * Class which starts a background thread that repeatedly scans an incoming - * directory and merges files found therein(which are assumed to be in CDX - * format) with a BDBIndex. Optional configurations include: - * - * target directory where merged files are moved to (otherwise deleted) - * target directory where failed failed are moved(otherwise left in place) - * milliseconds between scans of the incoming directory(default 10000) - * - * @author brad - * @version $Date$, $Revision$ - */ -public class BDBIndexUpdater { - /** - * Logger for this class - */ - private static final Logger LOGGER = - Logger.getLogger(BDBIndexUpdater.class.getName()); - - private final static int DEFAULT_RUN_INTERVAL_MS = 10000; - - private BDBIndex index = null; - - private File incoming = null; - - private File merged = null; - - private File failed = null; - - private int runInterval = DEFAULT_RUN_INTERVAL_MS; - - /** - * Thread object of update thread -- also is flag indicating if the thread - * has already been started. Access to it is synchronized. - */ - private Thread updateThread = null; - - /** - * Default constructor - */ - public BDBIndexUpdater() { - - } - /** - * @param index - * @param incoming - */ - public BDBIndexUpdater(BDBIndex index, File incoming) { - this.index = index; - this.incoming = incoming; - } - - /** - * start the background index merging thread - * @throws ConfigurationException - */ - public void init() throws ConfigurationException { - if(index == null) { - throw new ConfigurationException("No index target on bdb updater"); - } - if(incoming == null) { - throw new ConfigurationException("No incoming on bdb updater"); - } - startUpdateThread(); - } - - /** Ensure the argument directory exists - * @param dir - * @throws IOException - */ - private void ensureDir(File dir) throws IOException { - if (!dir.isDirectory() && !dir.mkdirs()) { - throw new IOException("FAILED to create " + dir.getAbsolutePath()); - } - } - - /** - * start a background thread that merges new CDX files in incoming into - * the BDBIndex. - * - * @throws ConfigurationException - */ - public void startup() throws ConfigurationException { - try { - ensureDir(incoming); - if(merged != null) ensureDir(merged); - if(failed != null) ensureDir(failed); - } catch (IOException e) { - e.printStackTrace(); - throw new ConfigurationException(e.getMessage()); - } - - if (updateThread == null) { - startUpdateThread(); - } - } - - /** - * start the BDBIndexUpdaterThread thread, which will scan for new cdx files - * in the incoming directory, and add them to the BDBIndex. - */ - private synchronized void startUpdateThread() { - if (updateThread != null) { - return; - } - updateThread = new BDBIndexUpdaterThread(this,runInterval); - updateThread.start(); - } - - - private boolean mergeFile(File cdxFile) { - boolean added = false; - try { - FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); - AdaptedIterator<String,SearchResult> searchResultItr = - new AdaptedIterator<String,SearchResult>( - ffile.getSequentialIterator(), - new CDXLineToSearchResultAdapter()); - Iterator<BDBRecord> it = new AdaptedIterator<SearchResult,BDBRecord> - (searchResultItr,new SearchResultToBDBRecordAdapter()); - - index.insertRecords(it); - added = true; - } catch (IOException e) { - e.printStackTrace(); - } - return added; - } - - private File getTargetFile(File f, File targetDir) { - File target = new File(targetDir, f.getName()); - int x = 0; - while(target.exists()) { - if(x++ > 255) { - throw new RuntimeException("too many " - + "duplicates of file " + f.getAbsolutePath() + - " in " + targetDir.getAbsolutePath()); - } - target = new File(targetDir,f.getName() + "." + x); - } - return target; - } - - private File ensureDir(String path) throws ConfigurationException { - if(path.length() < 1) { - throw new ConfigurationException("Empty directory path"); - } - File dir = new File(path); - if(dir.exists()) { - if(!dir.isDirectory()) { - throw new ConfigurationException("path " + path + "exists" + - "but is not a directory"); - } - } else { - if(!dir.mkdirs()) { - throw new ConfigurationException("unable to create directory" + - " at " + path); - } - } - return dir; - } - - private void handleMerged(File f) { - if (merged == null) { - if (!f.delete()) { - // big problems... lets exit - throw new RuntimeException("Unable to delete " - + f.getAbsolutePath()); - } - LOGGER.info("Removed merged file " + f.getAbsolutePath()); - } else { - // move to merged: - File target = getTargetFile(f,merged); - if (!f.renameTo(target)) { - throw new RuntimeException("FAILED rename" + "(" - + f.getAbsolutePath() + ") to " + "(" - + target.getAbsolutePath() + ")"); - } - LOGGER.info("Renamed merged file " + f.getAbsolutePath() + " to " + - target.getAbsolutePath()); - } - } - - private void handleFailed(File f) { - if (failed == null) { - // nothing much to do.. just complain and leave it. - LOGGER.info("FAILED INDEX: " + f.getAbsolutePath()); - } else { - // move to failed: - File target = getTargetFile(f,failed); - if (!f.renameTo(target)) { - throw new RuntimeException("FAILED rename" + "(" - + f.getAbsolutePath() + ") to " + "(" - + target.getAbsolutePath() + ")"); - } - LOGGER.info("Renamed failed merge file " + f.getAbsolutePath() + - " to " + target.getAbsolutePath()); - } - } - - protected int mergeAll() { - int numMerged = 0; - File incomingFiles[] = incoming.listFiles(); - int i = 0; - for (i = 0; i < incomingFiles.length; i++) { - File f = incomingFiles[i]; - if (f.isFile()) { - if (mergeFile(f)) { - handleMerged(f); - numMerged++; - } else { - handleFailed(f); - } - } - } - return numMerged; - } - - /** - * @return the index - */ - public BDBIndex getIndex() { - return index; - } - - /** - * @param index the index to set - */ - public void setIndex(BDBIndex index) { - this.index = index; - } - - /** - * @return the incoming - */ - public String getIncoming() { - if(incoming == null) { - return null; - } - return incoming.getAbsolutePath(); - } - - /** - * @param incoming the incoming to set - * @throws ConfigurationException - */ - public void setIncoming(String incoming) throws ConfigurationException { - this.incoming = ensureDir(incoming); - } - - - /** - * @return the merged - */ - public String getMerged() { - if(merged == null) { - return null; - } - return merged.getAbsolutePath(); - } - - /** - * @param merged The merged to set. - * @throws ConfigurationException - */ - public void setMerged(String merged) throws ConfigurationException { - this.merged = ensureDir(merged); - } - /** - * @param merged - * @throws IOException - */ - public void setMerged(File merged) throws IOException { - ensureDir(merged); - this.merged = merged; - } - - /** - * @return the failed - */ - public String getFailed() { - if(failed == null) { - return null; - } - return failed.getAbsolutePath(); - } - - /** - * @param failed The failed to set. - * @throws ConfigurationException - */ - public void setFailed(String failed) throws ConfigurationException { - this.failed = ensureDir(failed); - } - /** - * @param failed - * @throws IOException - */ - public void setFailed(File failed) throws IOException { - ensureDir(failed); - this.failed = failed; - } - - /** - * @return the runInterval - */ - public int getRunInterval() { - return runInterval; - } - - /** - * @param runInterval The runInterval to set. - */ - public void setRunInterval(int runInterval) { - this.runInterval = runInterval; - } - /** - * Thread that repeatedly calls mergeAll on the BDBIndexUpdater. - * - * @author Brad Tofel - * @version $Date$, $Revision$ - */ - private class BDBIndexUpdaterThread extends Thread { - /** - * object which merges CDX files with the BDBResourceIndex - */ - private BDBIndexUpdater updater = null; - - private int runInterval; - - /** - * @param updater - * @param runInterval - */ - public BDBIndexUpdaterThread(BDBIndexUpdater updater, int runInterval) { - super("BDBIndexUpdaterThread"); - super.setDaemon(true); - this.updater = updater; - this.runInterval = runInterval; - LOGGER.info("BDBIndexUpdaterThread is alive."); - } - - public void run() { - int sleepInterval = runInterval; - while (true) { - try { - int numMerged = updater.mergeAll(); - if (numMerged == 0) { - sleep(sleepInterval); - sleepInterval += runInterval; - } else { - sleepInterval = runInterval; - } - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - } - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java (from rev 2309, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/BDBIndexUpdater.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/LocalResourceIndexUpdater.java 2008-06-24 23:43:59 UTC (rev 2311) @@ -0,0 +1,401 @@ +/* BDBIndexUpdater + * + * $Id$ + * + * Created on 2:59:40 PM Oct 12, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.updater; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.archive.wayback.bdb.BDBRecord; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.resourceindex.bdb.BDBIndex; +import org.archive.wayback.resourceindex.bdb.SearchResultToBDBRecordAdapter; +import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; +//import org.archive.wayback.resourcestore.ArcIndexer; +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.flatfile.FlatFile; + +/** + * Class which starts a background thread that repeatedly scans an incoming + * directory and merges files found therein(which are assumed to be in CDX + * format) with a BDBIndex. Optional configurations include: + * + * target directory where merged files are moved to (otherwise deleted) + * target directory where failed failed are moved(otherwise left in place) + * milliseconds between scans of the incoming directory(default 10000) + * + * @author brad + * @version $Date$, $Revision$ + */ +public class LocalResourceIndexUpdater { + /** + * Logger for this class + */ + private static final Logger LOGGER = + Logger.getLogger(LocalResourceIndexUpdater.class.getName()); + + private final static int DEFAULT_RUN_INTERVAL_MS = 10000; + + private BDBIndex index = null; + + private File incoming = null; + + private File merged = null; + + private File failed = null; + + private int runInterval = DEFAULT_RUN_INTERVAL_MS; + + /** + * Thread object of update thread -- also is flag indicating if the thread + * has already been started. Access to it is synchronized. + */ + private Thread updateThread = null; + + /** + * Default constructor + */ + public LocalResourceIndexUpdater() { + + } + /** + * @param index + * @param incoming + */ + public LocalResourceIndexUpdater(BDBIndex index, File incoming) { + this.index = index; + this.incoming = incoming; + } + + /** + * start the background index merging thread + * @throws ConfigurationException + */ + public void init() throws ConfigurationException { + if(index == null) { + throw new ConfigurationException("No index target on bdb updater"); + } + if(incoming == null) { + throw new ConfigurationException("No incoming on bdb updater"); + } + startUpdateThread(); + } + + /** Ensure the argument directory exists + * @param dir + * @throws IOException + */ + private void ensureDir(File dir) throws IOException { + if (!dir.isDirectory() && !dir.mkdirs()) { + throw new IOException("FAILED to create " + dir.getAbsolutePath()); + } + } + + /** + * start a background thread that merges new CDX files in incoming into + * the BDBIndex. + * + * @throws ConfigurationException + */ + public void startup() throws ConfigurationException { + try { + ensureDir(incoming); + if(merged != null) ensureDir(merged); + if(failed != null) ensureDir(failed); + } catch (IOException e) { + e.printStackTrace(); + throw new ConfigurationException(e.getMessage()); + } + + if (updateThread == null) { + startUpdateThread(); + } + } + + /** + * start the BDBIndexUpdaterThread thread, which will scan for new cdx files + * in the incoming directory, and add them to the BDBIndex. + */ + private synchronized void startUpdateThread() { + if (updateThread != null) { + return; + } + updateThread = new BDBIndexUpdaterThread(this,runInterval); + updateThread.start(); + } + + + private boolean mergeFile(File cdxFile) { + boolean added = false; + try { + FlatFile ffile = new FlatFile(cdxFile.getAbsolutePath()); + AdaptedIterator<String,SearchResult> searchResultItr = + new AdaptedIterator<String,SearchResult>( + ffile.getSequentialIterator(), + new CDXLineToSearchResultAdapter()); + Iterator<BDBRecord> it = new AdaptedIterator<SearchResult,BDBRecord> + (searchResultItr,new SearchResultToBDBRecordAdapter()); + + index.insertRecords(it); + added = true; + } catch (IOException e) { + e.printStackTrace(); + } + return added; + } + + private File getTargetFile(File f, File targetDir) { + File target = new File(targetDir, f.getName()); + int x = 0; + while(target.exists()) { + if(x++ > 255) { + throw new RuntimeException("too many " + + "duplicates of file " + f.getAbsolutePath() + + " in " + targetDir.getAbsolutePath()); + } + target = new File(targetDir,f.getName() + "." + x); + } + return target; + } + + private File ensureDir(String path) throws ConfigurationException { + if(path.length() < 1) { + throw new ConfigurationException("Empty directory path"); + } + File dir = new File(path); + if(dir.exists()) { + if(!dir.isDirectory()) { + throw new ConfigurationException("path " + path + "exists" + + "but is not a directory"); + } + } else { + if(!dir.mkdirs()) { + throw new ConfigurationException("unable to create directory" + + " at " + path); + } + } + return dir; + } + + private void handleMerged(File f) { + if (merged == null) { + if (!f.delete()) { + // big problems... lets exit + throw new RuntimeException("Unable to delete " + + f.getAbsolutePath()); + } + LOGGER.info("Removed merged file " + f.getAbsolutePath()); + } else { + // move to merged: + File target = getTargetFile(f,merged); + if (!f.renameTo(target)) { + throw new RuntimeException("FAILED rename" + "(" + + f.getAbsolutePath() + ") to " + "(" + + target.getAbsolutePath() + ")"); + } + LOGGER.info("Renamed merged file " + f.getAbsolutePath() + " to " + + target.getAbsolutePath()); + } + } + + private void handleFailed(File f) { + if (failed == null) { + // nothing much to do.. just complain and leave it. + LOGGER.info("FAILED INDEX: " + f.getAbsolutePath()); + } else { + // move to failed: + File target = getTargetFile(f,failed); + if (!f.renameTo(target)) { + throw new RuntimeException("FAILED rename" + "(" + + f.getAbsolutePath() + ") to " + "(" + + target.getAbsolutePath() + ")"); + } + LOGGER.info("Renamed failed merge file " + f.getAbsolutePath() + + " to " + target.getAbsolutePath()); + } + } + + protected int mergeAll() { + int numMerged = 0; + File incomingFiles[] = incoming.listFiles(); + int i = 0; + for (i = 0; i < incomingFiles.length; i++) { + File f = incomingFiles[i]; + if (f.isFile()) { + if (mergeFile(f)) { + handleMerged(f); + numMerged++; + } else { + handleFailed(f); + } + } + } + return numMerged; + } + + /** + * @return the index + */ + public BDBIndex getIndex() { + return index; + } + + /** + * @param index the index to set + */ + public void setIndex(BDBIndex index) { + this.index = index; + } + + /** + * @return the incoming + */ + public String getIncoming() { + if(incoming == null) { + return null; + } + return incoming.getAbsolutePath(); + } + + /** + * @param incoming the incoming to set + * @throws ConfigurationException + */ + public void setIncoming(String incoming) throws ConfigurationException { + this.incoming = ensureDir(incoming); + } + + + /** + * @return the merged + */ + public String getMerged() { + if(merged == null) { + return null; + } + return merged.getAbsolutePath(); + } + + /** + * @param merged The merged to set. + * @throws ConfigurationException + */ + public void setMerged(String merged) throws ConfigurationException { + this.merged = ensureDir(merged); + } + /** + * @param merged + * @throws IOException + */ + public void setMerged(File merged) throws IOException { + ensureDir(merged); + this.merged = merged; + } + + /** + * @return the failed + */ + public String getFailed() { + if(failed == null) { + return null; + } + return failed.getAbsolutePath(); + } + + /** + * @param failed The failed to set. + * @throws ConfigurationException + */ + public void setFailed(String failed) throws ConfigurationException { + this.failed = ensureDir(failed); + } + /** + * @param failed + * @throws IOException + */ + public void setFailed(File failed) throws IOException { + ensureDir(failed); + this.failed = failed; + } + + /** + * @return the runInterval + */ + public int getRunInterval() { + return runInterval; + } + + /** + * @param runInterval The runInterval to set. + */ + public void setRunInterval(int runInterval) { + this.runInterval = runInterval; + } + /** + * Thread that repeatedly calls mergeAll on the BDBIndexUpdater. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ + private class BDBIndexUpdaterThread extends Thread { + /** + * object which merges CDX files with the BDBResourceIndex + */ + private LocalResourceIndexUpdater updater = null; + + private int runInterval; + + /** + * @param updater + * @param runInterval + */ + public BDBIndexUpdaterThread(LocalResourceIndexUpdater updater, int runInterval) { + super("BDBIndexUpdaterThread"); + super.setDaemon(true); + this.updater = updater; + this.runInterval = runInterval; + LOGGER.info("BDBIndexUpdaterThread is alive."); + } + + public void run() { + int sleepInterval = runInterval; + while (true) { + try { + int numMerged = updater.mergeAll(); + if (numMerged == 0) { + sleep(sleepInterval); + sleepInterval += runInterval; + } else { + sleepInterval = runInterval; + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |