From: Brad <bra...@us...> - 2005-11-17 02:52:47
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/cdx/indexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12585/src/java/org/archive/wayback/cdx/indexer Modified Files: IndexPipeline.java Log Message: FEATURE: removed dependancy on StringToStringTable, now uses HashMap. FEATURE: now indexes N ARCs at a time before merging, instead of indexing everything, then merging everything FEATURE: now only sleeps if nothing was merged, and sleeps for increasing number of seconds when nothing new has appeared. Index: IndexPipeline.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/cdx/indexer/IndexPipeline.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** IndexPipeline.java 16 Nov 2005 03:11:29 -0000 1.1 --- IndexPipeline.java 17 Nov 2005 02:52:39 -0000 1.2 *************** *** 28,31 **** --- 28,32 ---- import java.net.MalformedURLException; import java.util.ArrayList; + import java.util.HashMap; import java.util.Iterator; import java.util.Properties; *************** *** 37,41 **** import com.sleepycat.je.DatabaseException; - import com.sun.org.apache.xml.internal.utils.StringToStringTable; /** --- 38,41 ---- *************** *** 184,189 **** } ! private StringToStringTable getQueuedFiles() { ! StringToStringTable hash = new StringToStringTable(); String entries[] = queuedDir.list(); for (int i = 0; i < entries.length; i++) { --- 184,189 ---- } ! private HashMap getQueuedFiles() { ! HashMap hash = new HashMap(); String entries[] = queuedDir.list(); for (int i = 0; i < entries.length; i++) { *************** *** 209,213 **** // this should be a method call into ResourceStore... private Iterator getNewArcs() { ! StringToStringTable queued = getQueuedFiles(); ArrayList newArcs = new ArrayList(); --- 209,213 ---- // this should be a method call into ResourceStore... private Iterator getNewArcs() { ! HashMap queued = getQueuedFiles(); ArrayList newArcs = new ArrayList(); *************** *** 217,221 **** File arc = new File(arcDir,arcs[i]); if(arc.isFile() && arcs[i].endsWith(".arc.gz")) { ! if (!queued.contains(arcs[i])) { newArcs.add(arcs[i]); } --- 217,222 ---- File arc = new File(arcDir,arcs[i]); if(arc.isFile() && arcs[i].endsWith(".arc.gz")) { ! ! if (!queued.containsKey(arcs[i])) { newArcs.add(arcs[i]); } *************** *** 253,259 **** * @throws IOException */ ! public void indexArcs(ArcIndexer indexer) throws MalformedURLException, ! IOException { Iterator toBeIndexed = getDirFilesIterator(toBeIndexedDir); while(toBeIndexed.hasNext()) { String base = (String) toBeIndexed.next(); --- 254,261 ---- * @throws IOException */ ! public void indexArcs(ArcIndexer indexer, int max) ! throws MalformedURLException, IOException { Iterator toBeIndexed = getDirFilesIterator(toBeIndexedDir); + int numIndexed = 0; while(toBeIndexed.hasNext()) { String base = (String) toBeIndexed.next(); *************** *** 275,278 **** --- 277,284 ---- + toBeIndexedFlagFile.getAbsolutePath()); } + numIndexed++; + if(max > 0 && (numIndexed >= max)) { + break; + } } } *************** *** 283,287 **** * @param dbWriter */ ! public void mergeIndex(BDBResourceIndexWriter dbWriter) { int numMerged = 0; Iterator toBeMerged = getDirFilesIterator(toBeMergedDir); --- 289,293 ---- * @param dbWriter */ ! public int mergeIndex(BDBResourceIndexWriter dbWriter) { int numMerged = 0; Iterator toBeMerged = getDirFilesIterator(toBeMergedDir); *************** *** 304,307 **** --- 310,314 ---- System.out.println("Merged " + numMerged + " files."); } + return numMerged; } *************** *** 337,341 **** private class IndexPipelineThread extends Thread { private final static int SLEEP_MILLISECONDS = 10000; ! private BDBResourceIndexWriter merger = null; private ArcIndexer indexer = new ArcIndexer(); --- 344,348 ---- private class IndexPipelineThread extends Thread { private final static int SLEEP_MILLISECONDS = 10000; ! private final static int MAX_TO_MERGE = 10; private BDBResourceIndexWriter merger = null; private ArcIndexer indexer = new ArcIndexer(); *************** *** 362,371 **** public void run() { while (true) { try { pipeline.queueNewArcsForIndex(); ! pipeline.indexArcs(indexer); ! pipeline.mergeIndex(merger); ! sleep(SLEEP_MILLISECONDS); } catch (InterruptedException e) { e.printStackTrace(); --- 369,384 ---- public void run() { + int sleepInterval = SLEEP_MILLISECONDS; while (true) { try { pipeline.queueNewArcsForIndex(); ! pipeline.indexArcs(indexer,MAX_TO_MERGE); ! int numMerged = pipeline.mergeIndex(merger); ! if(numMerged == 0) { ! sleep(sleepInterval); ! sleepInterval += SLEEP_MILLISECONDS; ! } else { ! sleepInterval = SLEEP_MILLISECONDS; ! } } catch (InterruptedException e) { e.printStackTrace(); |