From: <bra...@us...> - 2007-12-11 02:26:13
|
Revision: 2102 http://archive-access.svn.sourceforge.net/archive-access/?rev=2102&view=rev Author: bradtofel Date: 2007-12-10 18:26:18 -0800 (Mon, 10 Dec 2007) Log Message: ----------- FEATURE: Command line main() now accepts multiple fields to canonicalize in a single pass. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java 2007-12-11 02:25:10 UTC (rev 2101) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java 2007-12-11 02:26:18 UTC (rev 2102) @@ -27,6 +27,7 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -341,7 +342,8 @@ UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); int n = 0; int i = 0; - int column = 0; + ArrayList<Integer> columns = new ArrayList<Integer>(); + long lineNumber = 0; boolean cdxPassThru = false; String delimiter = " "; @@ -357,7 +359,7 @@ } String val = args[n+1]; if(arg.compareTo("-f") == 0) { - column = Integer.parseInt(val) - 1; + columns.add(new Integer(val)); } else if(arg.compareTo("-d") == 0) { delimiter = val; } else { @@ -365,9 +367,20 @@ } n += 2; } + // place default '0' in case none specified: + if(columns.size() == 0) { + columns.add(new Integer(1)); + } + + // convert to int[]: + int[] cols = new int[columns.size()]; + for(int idx = 0; idx < columns.size(); idx++) { + cols[idx] = columns.get(idx).intValue() - 1; + } BufferedReader r = new BufferedReader(new InputStreamReader(System.in)); StringBuilder sb = new StringBuilder(); String line = null; + while(true) { try { line = r.readLine(); @@ -384,27 +397,29 @@ continue; } String parts[] = line.split(delimiter); - if(column >= parts.length) { - System.err.println("Invalid line " + lineNumber + " (" + - line + ") skipped"); - } else { - try { - parts[column] = canonicalizer.urlStringToKey(parts[column]); - } catch (URIException e) { - System.err.println("Invalid URL in line " + lineNumber + " (" + - line + ") skipped"); - e.printStackTrace(); - continue; - } - sb.setLength(0); - for(i = 0; i < parts.length; i++) { - sb.append(parts[i]); - if(i < (parts.length-1)) { - sb.append(delimiter); + for(int column : cols) { + if(column >= parts.length) { + System.err.println("Invalid line " + lineNumber + " (" + + line + ") skipped"); + } else { + try { + parts[column] = canonicalizer.urlStringToKey(parts[column]); + } catch (URIException e) { + System.err.println("Invalid URL in line " + lineNumber + " (" + + line + ") skipped (" + parts[column] + ")"); + e.printStackTrace(); + continue; } } - System.out.println(sb.toString()); } + sb.setLength(0); + for(i = 0; i < parts.length; i++) { + sb.append(parts[i]); + if(i < (parts.length-1)) { + sb.append(delimiter); + } + } + System.out.println(sb.toString()); } } } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |