From: <bra...@us...> - 2011-05-25 01:47:00
|
Revision: 3455 http://archive-access.svn.sourceforge.net/archive-access/?rev=3455&view=rev Author: bradtofel Date: 2011-05-25 01:46:53 +0000 (Wed, 25 May 2011) Log Message: ----------- OPTIMIZ: now use static reference to ByteOp.UTF8 Charset object. Previously, it was either being "assumed" as default, as in, not specified, or referenced by name, causing a lookup of the Charset object, which was causing lock contention Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotRules.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -31,6 +31,8 @@ import java.util.logging.Logger; +import org.archive.wayback.util.ByteOp; + /** * Class which parses a robots.txt file, storing the rules contained therein, * and then allows for testing if path/userAgent tuples are blocked by those @@ -80,7 +82,7 @@ public void parse(InputStream is) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader( - (InputStream) is)); + (InputStream) is,ByteOp.UTF8)); String read; ArrayList<String> current = null; while (br != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBIndex.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -34,6 +34,7 @@ import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; import org.archive.wayback.util.AdaptedIterator; import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.bdb.BDBRecord; import org.archive.wayback.util.bdb.BDBRecordSet; @@ -204,7 +205,7 @@ } else if(op.compareTo("-w") == 0) { BufferedReader br = new BufferedReader( - new InputStreamReader(System.in)); + new InputStreamReader(System.in,ByteOp.UTF8)); RecordIterator itrS = new RecordIterator(br); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/bdb/BDBRecordToSearchResultAdapter.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -24,6 +24,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.CDXLineToSearchResultAdapter; import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.bdb.BDBRecord; /** @@ -50,18 +51,13 @@ */ public CaptureSearchResult adapt(BDBRecord record) { sb.setLength(0); - try { - String key = new String(record.getKey().getData(),"UTF-8"); - int urlEnd = key.indexOf(' '); - int dateSpecEnd = key.indexOf(' ',urlEnd + 1); - sb.append(key.substring(0,dateSpecEnd)); - sb.append(" "); - sb.append(new String(record.getValue().getData(),"UTF-8")); - sb.append(key.substring(dateSpecEnd)); - } catch (UnsupportedEncodingException e) { - // should not happen with UTF-8 hard-coded.. - e.printStackTrace(); - } + String key = new String(record.getKey().getData(),ByteOp.UTF8); + int urlEnd = key.indexOf(' '); + int dateSpecEnd = key.indexOf(' ',urlEnd + 1); + sb.append(key.substring(0,dateSpecEnd)); + sb.append(" "); + sb.append(new String(record.getValue().getData(),ByteOp.UTF8)); + sb.append(key.substring(dateSpecEnd)); return CDXLineToSearchResultAdapter.doAdapt(sb.toString()); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXFormatIndex.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -20,14 +20,17 @@ package org.archive.wayback.resourceindex.cdx; import java.io.BufferedReader; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.util.Iterator; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.cdx.format.CDXFormat; import org.archive.wayback.resourceindex.cdx.format.CDXFormatException; import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; public class CDXFormatIndex extends CDXIndex { @@ -44,7 +47,9 @@ try { // BUGBUG: I don't think java will let us do much better than // this... No way to stat() a filehandle, right? - BufferedReader fr = new BufferedReader(new FileReader(file)); + FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis,ByteOp.UTF8); + BufferedReader fr = new BufferedReader(isr); cdx = new CDXFormat(fr.readLine()); lastMod = nowMod; fr.close(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/updater/RemoteSubmitFilter.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -40,6 +40,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.archive.wayback.util.ByteOp; + /** * Filter that accepts PUT HTTP requests to insert CDX files into the incoming * directory for a local BDBIndex. @@ -152,7 +154,7 @@ InputStream input; input = request.getInputStream(); BufferedInputStream in = new BufferedInputStream(input); - BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + BufferedReader reader = new BufferedReader(new InputStreamReader(in,ByteOp.UTF8)); FileWriter out = new FileWriter(tmpFile); while ((i = reader.read()) != -1) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -27,6 +27,8 @@ import java.util.logging.Logger; import java.util.zip.GZIPInputStream; +import org.archive.wayback.util.ByteOp; + /** * @author brad * @@ -73,6 +75,6 @@ URLConnection uc = u.openConnection(); uc.setRequestProperty(RANGE_HEADER, sb.toString()); return new BufferedReader(new InputStreamReader( - new GZIPInputStream(uc.getInputStream()))); + new GZIPInputStream(uc.getInputStream()),ByteOp.UTF8)); } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesChunkIterator.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -31,6 +31,7 @@ import java.util.zip.GZIPInputStream; import org.archive.wayback.exception.RuntimeIOException; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; /** @@ -134,7 +135,7 @@ long offset = i * ZiplinedBlock.BLOCK_SIZE; raf.seek(offset); BufferedReader br = new BufferedReader(new InputStreamReader( - new GZIPInputStream(new FileInputStream(raf.getFD())))); + new GZIPInputStream(new FileInputStream(raf.getFD())),ByteOp.UTF8)); String line = br.readLine(); if(line == null) { System.err.println("Bad block at " + offset + " in " + args[0]); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/indexer/IndexQueueUpdater.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -21,13 +21,16 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.logging.Logger; import org.archive.wayback.Shutdownable; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.DirMaker; @@ -102,7 +105,9 @@ public long getLastMark() throws IOException { long mark = 0; if(file.isFile() && file.length() > 0) { - BufferedReader ir = new BufferedReader(new FileReader(file)); + FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis,ByteOp.UTF8); + BufferedReader ir = new BufferedReader(isr); String line = ir.readLine(); if(line != null) { mark = Long.parseLong(line); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -25,6 +25,7 @@ import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBLog; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.bdb.BDBRecordSet; @@ -277,7 +278,7 @@ db.setBdbName(bdbName); db.setLogPath(logPath); BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); + new InputStreamReader(System.in,ByteOp.UTF8)); String line; int exitCode = 0; try { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -34,6 +34,7 @@ import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.util.ParameterFormatter; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.WrappedCloseableIterator; @@ -265,7 +266,7 @@ if(operation.equalsIgnoreCase("add-stream")) { BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); + new InputStreamReader(System.in,ByteOp.UTF8)); String line; try { while((line = r.readLine()) != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -21,11 +21,14 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStreamReader; import java.io.RandomAccessFile; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.flatfile.RecordIterator; @@ -87,7 +90,9 @@ RandomAccessFile raf = new RandomAccessFile(this, "r"); raf.seek(start); - BufferedReader is = new BufferedReader(new FileReader(raf.getFD())); + FileInputStream fis = new FileInputStream(raf.getFD()); + InputStreamReader isr = new InputStreamReader(fis,ByteOp.UTF8); + BufferedReader is = new BufferedReader(isr); return new BufferedRangeIterator(new RecordIterator(is),end - start); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -24,6 +24,8 @@ import java.io.InputStreamReader; import java.net.URL; +import org.archive.wayback.util.ByteOp; + /** * * @@ -57,7 +59,7 @@ String url = "http://localhost:8080" + jsp + "?url=" + prefix; URL u = new URL(url); InputStream is = u.openStream(); - InputStreamReader isr = new InputStreamReader(is); + InputStreamReader isr = new InputStreamReader(is,ByteOp.UTF8); StringBuilder sb = new StringBuilder(2000); int READ_SIZE = 2048; char cbuf[] = new char[READ_SIZE]; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -27,7 +27,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.archive.wayback.util.ByteOp; + /** * * @@ -61,7 +63,7 @@ public static List<String> extractLinks(final String url) throws IOException { URL u = new URL(url); InputStream is = u.openStream(); - InputStreamReader isr = new InputStreamReader(is); + InputStreamReader isr = new InputStreamReader(is,ByteOp.UTF8); StringBuilder sb = new StringBuilder(2000); int READ_SIZE = 2048; char cbuf[] = new char[READ_SIZE]; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.charset.Charset; /** * Byte oriented static methods. Likely a lot of overlap with apache- commons @@ -33,6 +34,7 @@ public class ByteOp { /** Default buffer size for IO ops */ public final static int BUFFER_SIZE = 4096; + public final static Charset UTF8 = Charset.forName("utf-8"); /** * Create a new byte array with contents initialized to values from the Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/bdb/BDBRecordSet.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -24,7 +24,9 @@ import java.io.UnsupportedEncodingException; import java.util.Iterator; +import org.archive.wayback.util.ByteOp; + import com.sleepycat.je.Cursor; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; @@ -119,26 +121,14 @@ * @return byte array representation of String s in UTF-8 */ public static byte[] stringToBytes(String s) { - try { - return s.getBytes("UTF-8"); - } catch (UnsupportedEncodingException e) { - // no UTF-8, huh? - e.printStackTrace(); - return s.getBytes(); - } + return s.getBytes(ByteOp.UTF8); } /** * @param ba * @return String of UTF-8 encoded bytes ba */ public static String bytesToString(byte[] ba) { - try { - return new String(ba,"UTF-8"); - } catch (UnsupportedEncodingException e) { - // not likely.. - e.printStackTrace(); - return new String(ba); - } + return new String(ba,ByteOp.UTF8); } /** Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/flatfile/FlatFile.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -21,13 +21,16 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.RandomAccessFile; import java.util.Comparator; import java.util.Iterator; +import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.CompositeSortedIterator; @@ -180,7 +183,9 @@ RandomAccessFile raf = new RandomAccessFile(file,"r"); long offset = findKeyOffset(raf,prefix); lastMatchOffset = offset; - BufferedReader br = new BufferedReader(new FileReader(raf.getFD())); + FileInputStream is = new FileInputStream(raf.getFD()); + InputStreamReader isr = new InputStreamReader(is, ByteOp.UTF8); + BufferedReader br = new BufferedReader(isr); itr = new RecordIterator(br); return itr; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-05-25 01:44:09 UTC (rev 3454) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2011-05-25 01:46:53 UTC (rev 3455) @@ -31,6 +31,7 @@ import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.wayback.UrlCanonicalizer; +import org.archive.wayback.util.ByteOp; /** * Class that performs the standard Heritrix URL canonicalization. Eventually, @@ -365,7 +366,7 @@ for(int idx = 0; idx < columns.size(); idx++) { cols[idx] = columns.get(idx).intValue() - 1; } - BufferedReader r = new BufferedReader(new InputStreamReader(System.in)); + BufferedReader r = new BufferedReader(new InputStreamReader(System.in,ByteOp.UTF8)); StringBuilder sb = new StringBuilder(); String line = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |