Revision: 2210
http://archive-access.svn.sourceforge.net/archive-access/?rev=2210&view=rev
Author: bradtofel
Date: 2008-02-29 18:03:30 -0800 (Fri, 29 Feb 2008)
Log Message:
-----------
BUGFIX: (ACC-13) now includes .warc.gz and .warc files in sync
FEATURE: now supports "stream" option, which give a lot more flexibility in integrating location-client with other processes, and is much more efficient than multiple one-shot invocations of location-client.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java 2008-03-01 01:59:24 UTC (rev 2209)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java 2008-03-01 02:03:30 UTC (rev 2210)
@@ -24,9 +24,11 @@
*/
package org.archive.wayback.resourcestore.http;
+import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
+import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Iterator;
import java.util.logging.Logger;
@@ -51,6 +53,8 @@
private final static String ARC_SUFFIX = ".arc";
private final static String ARC_GZ_SUFFIX = ".arc.gz";
+ private final static String WARC_SUFFIX = ".warc";
+ private final static String WARC_GZ_SUFFIX = ".warc.gz";
private final static String OK_RESPONSE_PREFIX = "OK ";
private HttpClient client = null;
@@ -228,7 +232,14 @@
"\n" +
"\t mark-range LOCATION-DB-URL START END\n" +
"\t\temit to STDOUT one line with the name of all ARC files\n" +
- "\t\tadded to the locationDB between marks START and END\n");
+ "\t\tadded to the locationDB between marks START and END\n" +
+ "\n" +
+ "\t add-stream LOCATION-DB-URL\n" +
+ "\t\tread lines from STDIN formatted like:\n" +
+ "\t\t\tNAME<SPACE>URL\n" +
+ "\t\tand for each line, inform locationDB that file NAME is\n" +
+ "\t\tlocated at URL\n"
+ );
System.exit(2);
}
@@ -236,142 +247,170 @@
* @param args
*/
public static void main(String[] args) {
- if(args.length < 3) {
+ if(args.length < 2) {
USAGE("");
System.exit(1);
}
String operation = args[0];
String url = args[1];
- String arc = args[2];
if(!url.startsWith("http://")) {
USAGE("URL argument 1 must begin with http://");
}
- FileLocationDBClient locationClient = new FileLocationDBClient(url);
- if(operation.equalsIgnoreCase("lookup")) {
- if(args.length < 3) {
- USAGE("lookup LOCATION-URL ARC");
- }
+ FileLocationDBClient locationClient = new FileLocationDBClient(url);
+
+ if(operation.equalsIgnoreCase("add-stream")) {
+ BufferedReader r = new BufferedReader(
+ new InputStreamReader(System.in));
+ String line;
try {
- String[] locations = locationClient.arcToUrls(arc);
- if(locations == null) {
- System.err.println("No locations for " + arc);
- System.exit(1);
+ while((line = r.readLine()) != null) {
+ String parts[] = line.split(" ");
+ if(parts.length != 2) {
+ System.err.println("Bad input(" + line + ")");
+ System.exit(2);
+ }
+ locationClient.addArcUrl(parts[0],parts[1]);
+ System.out.println("Added\t" + parts[0] + "\t" + parts[1]);
}
- for(int i=0; i <locations.length; i++) {
- System.out.println(locations[i]);
- }
} catch (IOException e) {
- System.err.println(e.getMessage());
+ e.printStackTrace();
System.exit(1);
}
- } else if(operation.equalsIgnoreCase("get-mark")) {
- if(args.length != 2) {
- USAGE("get-mark LOCATION-URL");
- }
- try {
- long mark = locationClient.getCurrentMark();
- System.out.println(mark);
- } catch (IOException e) {
- System.err.println(e.getMessage());
+ } else {
+ if(args.length < 3) {
+ USAGE("");
System.exit(1);
}
-
- } else if(operation.equalsIgnoreCase("mark-range")) {
- if(args.length != 4) {
- USAGE("mark-range LOCATION-URL START END");
- }
- long start = Long.parseLong(args[3]);
- long end = Long.parseLong(args[4]);
- try {
- Iterator<String> it =
- locationClient.getArcsBetweenMarks(start,end);
- while(it.hasNext()) {
- String next = (String) it.next();
- System.out.println(next);
+ String arc = args[2];
+ if(operation.equalsIgnoreCase("lookup")) {
+ if(args.length < 3) {
+ USAGE("lookup LOCATION-URL ARC");
}
- } catch (IOException e) {
- System.err.println(e.getMessage());
- System.exit(1);
- }
-
-
- } else if(operation.equalsIgnoreCase("add")) {
- if(args.length != 4) {
- USAGE("add LOCATION-URL ARC ARC-URL");
- }
- String arcUrl = args[3];
- if(!arcUrl.startsWith("http://")) {
- USAGE("ARC-URL argument 4 must begin with http://");
- }
- try {
- locationClient.addArcUrl(arc,arcUrl);
- System.out.println("OK");
- } catch (IOException e) {
- System.err.println(e.getMessage());
- System.exit(1);
- }
-
- } else if(operation.equalsIgnoreCase("remove")) {
-
- if(args.length != 4) {
- USAGE("remove LOCATION-URL ARC ARC-URL");
- }
- String arcUrl = args[3];
- if(!arcUrl.startsWith("http://")) {
- USAGE("ARC-URL argument 4 must begin with http://");
- }
- try {
- locationClient.removeArcUrl(arc,arcUrl);
- System.out.println("OK");
- } catch (IOException e) {
- System.err.println(e.getMessage());
- System.exit(1);
- }
-
- } else if(operation.equalsIgnoreCase("sync")) {
-
- if(args.length != 4) {
- USAGE("sync LOCATION-URL DIR DIR-URL");
- }
- File dir = new File(arc);
- String dirUrl = args[3];
- if(!dirUrl.startsWith("http://")) {
- USAGE("DIR-URL argument 4 must begin with http://");
- }
- try {
- if(!dir.isDirectory()) {
- USAGE("DIR " + arc + " is not a directory");
+ try {
+ String[] locations = locationClient.arcToUrls(arc);
+ if(locations == null) {
+ System.err.println("No locations for " + arc);
+ System.exit(1);
+ }
+ for(int i=0; i <locations.length; i++) {
+ System.out.println(locations[i]);
+ }
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ System.exit(1);
}
- FileFilter filter = new FileFilter() {
- public boolean accept(File daFile) {
- return daFile.isFile() &&
- (daFile.getName().endsWith(ARC_SUFFIX) ||
- daFile.getName().endsWith(ARC_GZ_SUFFIX));
+ } else if(operation.equalsIgnoreCase("get-mark")) {
+ if(args.length != 2) {
+ USAGE("get-mark LOCATION-URL");
+ }
+ try {
+ long mark = locationClient.getCurrentMark();
+ System.out.println(mark);
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ System.exit(1);
+ }
+
+ } else if(operation.equalsIgnoreCase("mark-range")) {
+ if(args.length != 4) {
+ USAGE("mark-range LOCATION-URL START END");
+ }
+ long start = Long.parseLong(args[3]);
+ long end = Long.parseLong(args[4]);
+ try {
+ Iterator<String> it =
+ locationClient.getArcsBetweenMarks(start,end);
+ while(it.hasNext()) {
+ String next = (String) it.next();
+ System.out.println(next);
}
- };
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ System.exit(1);
+ }
- File[] arcs = dir.listFiles(filter);
- if(arcs == null) {
- throw new IOException("Directory " + dir.getAbsolutePath() +
- " is not a directory or had an IO error");
+
+ } else if(operation.equalsIgnoreCase("add")) {
+ if(args.length != 4) {
+ USAGE("add LOCATION-URL ARC ARC-URL");
}
- for(int i = 0; i < arcs.length; i++) {
- File arcFile = arcs[i];
- String arcName = arcFile.getName();
- String arcUrl = dirUrl + arcName;
- LOGGER.info("Adding location " + arcUrl + " for arc " + arcName);
- locationClient.addArcUrl(arcName,arcUrl);
+ String arcUrl = args[3];
+ if(!arcUrl.startsWith("http://")) {
+ USAGE("ARC-URL argument 4 must begin with http://");
}
- } catch (IOException e) {
- System.err.println(e.getMessage());
- System.exit(1);
+ try {
+ locationClient.addArcUrl(arc,arcUrl);
+ System.out.println("OK");
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ System.exit(1);
+ }
+
+ } else if(operation.equalsIgnoreCase("remove")) {
+
+ if(args.length != 4) {
+ USAGE("remove LOCATION-URL ARC ARC-URL");
+ }
+ String arcUrl = args[3];
+ if(!arcUrl.startsWith("http://")) {
+ USAGE("ARC-URL argument 4 must begin with http://");
+ }
+ try {
+ locationClient.removeArcUrl(arc,arcUrl);
+ System.out.println("OK");
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ System.exit(1);
+ }
+
+ } else if(operation.equalsIgnoreCase("sync")) {
+
+ if(args.length != 4) {
+ USAGE("sync LOCATION-URL DIR DIR-URL");
+ }
+ File dir = new File(arc);
+ String dirUrl = args[3];
+ if(!dirUrl.startsWith("http://")) {
+ USAGE("DIR-URL argument 4 must begin with http://");
+ }
+ try {
+ if(!dir.isDirectory()) {
+ USAGE("DIR " + arc + " is not a directory");
+ }
+
+ FileFilter filter = new FileFilter() {
+ public boolean accept(File daFile) {
+ return daFile.isFile() &&
+ (daFile.getName().endsWith(ARC_SUFFIX) ||
+ daFile.getName().endsWith(ARC_GZ_SUFFIX) ||
+ daFile.getName().endsWith(WARC_SUFFIX) ||
+ daFile.getName().endsWith(WARC_GZ_SUFFIX));
+ }
+ };
+
+ File[] files = dir.listFiles(filter);
+ if(files == null) {
+ throw new IOException("Directory " + dir.getAbsolutePath() +
+ " is not a directory or had an IO error");
+ }
+ for(int i = 0; i < files.length; i++) {
+ File file = files[i];
+ String name = file.getName();
+ String fileUrl = dirUrl + name;
+ LOGGER.info("Adding location " + fileUrl + " for file " + name);
+ locationClient.addArcUrl(name,fileUrl);
+ }
+ } catch (IOException e) {
+ System.err.println(e.getMessage());
+ System.exit(1);
+ }
+
+ } else {
+ USAGE(" unknown operation " + operation);
}
-
- } else {
- USAGE(" unknown operation " + operation);
}
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|