Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localbdbresourceindex In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/java/org/archive/wayback/localbdbresourceindex Added Files: BDBResourceIndexWriter.java LocalBDBResourceIndex.java BDBResourceIndex.java Log Message: Initial check-in -- pre code review --- NEW FILE: BDBResourceIndex.java --- package org.archive.wayback.localbdbresourceindex; import java.io.File; import java.text.ParseException; import java.util.Iterator; import org.archive.wayback.core.ResourceResult; import org.archive.wayback.core.ResourceResults; //import com.sleepycat.bind.tuple.TupleBinding; //import com.sleepycat.bind.tuple.TupleInput; //import com.sleepycat.bind.tuple.TupleOutput; import com.sleepycat.je.Cursor; //import com.sleepycat.je.CursorConfig; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; //import com.sleepycat.je.DatabaseNotFoundException; import com.sleepycat.je.Environment; import com.sleepycat.je.EnvironmentConfig; import com.sleepycat.je.LockMode; import com.sleepycat.je.OperationStatus; public class BDBResourceIndex { private String path; private String dbName; Environment env = null; Database db = null; // Cursor cursor = null; public BDBResourceIndex(final String thePath, final String theDbName) throws Exception { super(); initializeDB(thePath, theDbName); } protected void initializeDB(final String thePath, final String theDbName) throws Exception { path = thePath; dbName = theDbName; EnvironmentConfig environmentConfig = new EnvironmentConfig(); environmentConfig.setAllowCreate(true); environmentConfig.setTransactional(false); File file = new File(path); env = new Environment(file, environmentConfig); DatabaseConfig databaseConfig = new DatabaseConfig(); databaseConfig.setAllowCreate(true); databaseConfig.setTransactional(false); // perform other database configurations db = env.openDatabase(null, dbName, databaseConfig); } protected void shutdownDB() throws DatabaseException { if (db != null) { db.close(); } if (env != null) { env.close(); } } protected ResourceResults doUrlSearch(final String url, final String firstDate, final String lastDate, final String exactHost, final int maxRecords) { ResourceResults results = new ResourceResults(); DatabaseEntry key = new DatabaseEntry(); DatabaseEntry value = new DatabaseEntry(); int numRecords = 0; String searchStart = url + " " + firstDate; key.setData(searchStart.getBytes()); key.setPartial(false); try { Cursor cursor = db.openCursor(null, null); OperationStatus status = cursor.getSearchKeyRange(key, value, LockMode.DEFAULT); while (status == OperationStatus.SUCCESS) { // String keyString = new String(key.getData()); String valueString = new String(value.getData()); ResourceResult result = new ResourceResult(); result.parseLine(valueString, 0); if (!result.getUrl().equals(url)) { break; } if (result.getTimestamp().getDateStr().compareTo(lastDate) > 0) { break; } if (result.getTimestamp().getDateStr().compareTo(firstDate) >= 0) { results.addResourceResult(result); numRecords++; if (numRecords >= maxRecords) { break; } } status = cursor.getNext(key, value, LockMode.DEFAULT); } cursor.close(); } catch (DatabaseException dbe) { dbe.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } return results; } protected ResourceResults doUrlPrefixSearch(final String urlPrefix, final String firstDate, final String lastDate, final String exactHost, final int maxRecords) { ResourceResults results = new ResourceResults(); DatabaseEntry key = new DatabaseEntry(); DatabaseEntry value = new DatabaseEntry(); int numRecords = 0; String searchStart = urlPrefix; key.setData(searchStart.getBytes()); key.setPartial(false); try { Cursor cursor = db.openCursor(null, null); OperationStatus status = cursor.getSearchKeyRange(key, value, LockMode.DEFAULT); while (status == OperationStatus.SUCCESS) { String valueString = new String(value.getData()); ResourceResult result = new ResourceResult(); result.parseLine(valueString, 0); if (!result.getUrl().startsWith(urlPrefix)) { break; } if ((result.getTimestamp().getDateStr().compareTo(lastDate) <= 0) && (result.getTimestamp().getDateStr().compareTo( firstDate) >= 0)) { results.addResourceResult(result); numRecords++; if (numRecords >= maxRecords) { break; } } status = cursor.getNext(key, value, LockMode.DEFAULT); } cursor.close(); } catch (DatabaseException dbe) { dbe.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } return results; } protected void addResults(ResourceResults results) throws Exception { Iterator itr = results.iterator(); DatabaseEntry key = new DatabaseEntry(); DatabaseEntry value = new DatabaseEntry(); OperationStatus status = null; try { Cursor cursor = db.openCursor(null, null); while (itr.hasNext()) { ResourceResult result = (ResourceResult) itr.next(); String keyString = result.getUrl() + " " + result.getTimestamp().getDateStr(); String valueString = result.toString(); key.setData(keyString.getBytes()); value.setData(valueString.getBytes()); status = cursor.put(key, value); if (status != OperationStatus.SUCCESS) { throw new Exception("oops, put had non-success status"); } } cursor.close(); } catch (DatabaseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } --- NEW FILE: BDBResourceIndexWriter.java --- package org.archive.wayback.localbdbresourceindex; import java.io.File; import java.io.RandomAccessFile; import org.archive.wayback.core.ResourceResult; import org.archive.wayback.core.ResourceResults; import com.sleepycat.je.DatabaseException; public class BDBResourceIndexWriter { private BDBResourceIndex db = null; public BDBResourceIndexWriter() { super(); // TODO Auto-generated constructor stub } protected void init(final String thePath, final String theDbName) throws Exception { db = new BDBResourceIndex(thePath, theDbName); } protected void init(BDBResourceIndex db) { this.db = db; } protected void shutdown() throws DatabaseException { db.shutdownDB(); } public void importFile(String filePath) throws Exception { ResourceResults results = readFile(filePath); db.addResults(results); } private ResourceResults readFile(String filePath) throws Exception { File file = new File(filePath); RandomAccessFile raFile = new RandomAccessFile(file, "r"); ResourceResults results = new ResourceResults(); int lineNumber = 0; while (true) { String line = raFile.readLine(); if (line == null) { break; } lineNumber++; if ((lineNumber == 1) && (line.contains(" CDX "))) { continue; } ResourceResult result = new ResourceResult(); result.parseLine(line, lineNumber); results.addResourceResult(result); } return results; } /** * @param args */ public static void main(String[] args) { try { BDBResourceIndexWriter idx = new BDBResourceIndexWriter(); idx.init(args[0], args[1]); idx.importFile(args[2]); idx.shutdown(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } --- NEW FILE: LocalBDBResourceIndex.java --- package org.archive.wayback.localbdbresourceindex; import java.io.File; import java.io.IOException; import java.util.Properties; import org.archive.wayback.ResourceIndex; import org.archive.wayback.arcindexer.IndexPipeline; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.WMRequest; import org.archive.wayback.exception.BadQueryException; import org.archive.wayback.exception.WaybackException; public class LocalBDBResourceIndex implements ResourceIndex { private static Thread indexUpdateThread = null; private final static String INDEX_PATH = "resourceindex.indexPath"; private final static String DB_NAME = "resourceindex.dbName"; private final static String ARC_PATH = "resourceindex.arcPath"; private final static String WORK_PATH = "resourceindex.workPath"; private final static String RUN_PIPELINE = "resourceindex.runPipeline"; private final static int MAX_RECORDS = 1000; private BDBResourceIndex db = null; public LocalBDBResourceIndex() { super(); } public void init(Properties p) throws Exception { System.out.println("initializing LocalDBDResourceIndex..."); String dbPath = (String) p.get(INDEX_PATH); if (dbPath == null || (dbPath.length() <= 0)) { throw new IllegalArgumentException("Failed to find " + INDEX_PATH); } String arcPath = (String) p.get(ARC_PATH); if (arcPath == null || (arcPath.length() <= 0)) { throw new IllegalArgumentException("Failed to find " + ARC_PATH); } String workPath = (String) p.get(WORK_PATH); if (workPath == null || (workPath.length() <= 0)) { throw new IllegalArgumentException("Failed to find " + WORK_PATH); } String dbName = (String) p.get(DB_NAME); if (dbName == null || (dbName.length() <= 0)) { throw new IllegalArgumentException("Failed to find " + DB_NAME); } String runPipeline = (String) p.get(RUN_PIPELINE); db = new BDBResourceIndex(dbPath, dbName); if (runPipeline != null) { // QUESTION: are we sure there will be a single instace System.out .println("LocalDBDResourceIndex starting pipeline thread..."); if (indexUpdateThread == null) { IndexPipeline pipeline = new IndexPipeline(); String mergeDir = workPath + "/mergey"; pipeline.init(arcPath, mergeDir, workPath); startIndexUpdateThead(db, pipeline); } } } public ResourceResults query(WMRequest request) throws IOException, WaybackException { // TODO add check of WMRequest and call different methods: String searchHost = request.getRequestURI().getHostBasename(); String searchPath = request.getRequestURI().getEscapedPathQuery(); String searchUrl = searchHost + searchPath; if (request.isRetrieval()) { return db.doUrlSearch(searchUrl, request.getStartTimestamp() .getDateStr(), request.getEndTimestamp().getDateStr(), null, MAX_RECORDS); } else if (request.isQuery()) { return db.doUrlSearch(searchUrl, request.getStartTimestamp() .getDateStr(), request.getEndTimestamp().getDateStr(), null, MAX_RECORDS); } else if (request.isPathQuery()) { return db.doUrlPrefixSearch(searchUrl, request.getStartTimestamp() .getDateStr(), request.getEndTimestamp().getDateStr(), null, MAX_RECORDS); } else { throw new BadQueryException("Unknown query type"); } } protected synchronized void startIndexUpdateThead( final BDBResourceIndex bdb, IndexPipeline pipeline) { if (indexUpdateThread != null) { return; } indexUpdateThread = new IndexUpdateThread(bdb, pipeline); indexUpdateThread.start(); } private class IndexUpdateThread extends Thread { private final static int SLEEP_MILLISECONDS = 10000; BDBResourceIndexWriter importer = null; IndexPipeline pipeline = null; public IndexUpdateThread(final BDBResourceIndex bdb, IndexPipeline pipeline) { super("IndexUpdateThread"); super.setDaemon(true); this.importer = new BDBResourceIndexWriter(); importer.init(bdb); this.pipeline = pipeline; } public void run() { while (true) { try { indexArcs(); mergeIndex(); sleep(SLEEP_MILLISECONDS); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } // System.out.println("I'm running!"); } } private void indexArcs() { try { pipeline.indexArcs(); // System.out.println("Indexed..."); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void mergeIndex() { int numMerged = 0; String newFiles[] = pipeline.mergeDir.list(); for (int i = 0; i < newFiles.length; i++) { // TODO: Special handling of encoding and date. File newFile = new File(pipeline.mergeDir.getAbsolutePath() + "/" + newFiles[i]); if (newFile.isFile()) { try { importer.importFile(newFile.getAbsolutePath()); if (!newFile.delete()) { throw new IOException("Unable to unlink " + newFile.getAbsolutePath()); } numMerged++; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } if (numMerged > 0) { System.out.println("Merged " + numMerged + " files."); } } } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } |