From: <bra...@us...> - 2011-04-19 22:41:32
|
Revision: 3437 http://archive-access.svn.sourceforge.net/archive-access/?rev=3437&view=rev Author: bradtofel Date: 2011-04-19 22:41:25 +0000 (Tue, 19 Apr 2011) Log Message: ----------- BUGFIX + 1.6.1 RELEASE CANDIDATE: Modified Paths: -------------- branches/wayback-1_6_1/dist/pom.xml branches/wayback-1_6_1/pom.xml branches/wayback-1_6_1/wayback-core/pom.xml branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/webapp/PortMapper.java branches/wayback-1_6_1/wayback-hadoop/pom.xml branches/wayback-1_6_1/wayback-hadoop-java/pom.xml branches/wayback-1_6_1/wayback-webapp/pom.xml Modified: branches/wayback-1_6_1/dist/pom.xml =================================================================== --- branches/wayback-1_6_1/dist/pom.xml 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/dist/pom.xml 2011-04-19 22:41:25 UTC (rev 3437) @@ -7,7 +7,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.6.0</version> + <version>1.6.1</version> </parent> <artifactId>dist</artifactId> Modified: branches/wayback-1_6_1/pom.xml =================================================================== --- branches/wayback-1_6_1/pom.xml 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/pom.xml 2011-04-19 22:41:25 UTC (rev 3437) @@ -7,7 +7,7 @@ <groupId>org.archive.wayback</groupId> <artifactId>wayback</artifactId> <packaging>pom</packaging> - <version>1.6.0</version> + <version>1.6.1</version> <name>Wayback</name> <modules> @@ -262,7 +262,7 @@ <dependency> <groupId>org.archive.heritrix</groupId> <artifactId>heritrix-commons</artifactId> - <version>3.1.1-SNAPSHOT</version> + <version>3.0.1-SNAPSHOT</version> </dependency> <dependency> <groupId>org.archive.access-control</groupId> Modified: branches/wayback-1_6_1/wayback-core/pom.xml =================================================================== --- branches/wayback-1_6_1/wayback-core/pom.xml 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/pom.xml 2011-04-19 22:41:25 UTC (rev 3437) @@ -8,7 +8,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.6.0</version> + <version>1.6.1</version> </parent> <artifactId>wayback-core</artifactId> Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -159,6 +159,15 @@ public String getTemplate() { return LIVE_WAYBACK_TEMPLATE; } + + public boolean getFrequentFlushes() { + // TODO Auto-generated method stub + return false; + } + + public int getWriteBufferSize() { + return 4096; + } }; } Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -28,6 +28,7 @@ import java.util.Set; import org.apache.commons.httpclient.Header; +import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCRecord; @@ -54,7 +55,7 @@ * object for ARCReader -- need to hold on to this in order to call close() * to release filehandle after completing access to this record. optional */ - ARCReader arcReader = null; + ArchiveReader arcReader = null; /** * flag to indicate if the ARCRecord skipHTTPHeader() has been called */ @@ -71,7 +72,7 @@ * @param rec * @param reader */ - public ArcResource(final ARCRecord rec,final ARCReader reader) { + public ArcResource(final ARCRecord rec,final ArchiveReader reader) { super(); arcRecord = rec; arcReader = reader; Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -20,7 +20,10 @@ package org.archive.wayback.resourcestore.resourcefile; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; import java.net.URL; import org.archive.io.ArchiveReader; @@ -62,14 +65,19 @@ name = name.substring(0, name.length() - ArcWarcFilenameFilter.OPEN_SUFFIX.length()); } + RandomAccessFile raf = new RandomAccessFile(file, "r"); + raf.seek(offset); + InputStream is = new FileInputStream(raf.getFD()); + String fPath = file.getAbsolutePath(); if (isArc(name)) { - - ARCReader reader = ARCReaderFactory.get(file,offset); +// ARCReader reader = ARCReaderFactory.get(file,offset); + ArchiveReader reader = ARCReaderFactory.get(fPath,is,false); r = ARCArchiveRecordToResource(reader.get(),reader); } else if (isWarc(name)) { - WARCReader reader = WARCReaderFactory.get(file,offset); + ArchiveReader reader = WARCReaderFactory.get(fPath,is,false); +// WARCReader reader = WARCReaderFactory.get(file,offset); r = WARCArchiveRecordToResource(reader.get(),reader); } else { @@ -114,7 +122,7 @@ } public static Resource ARCArchiveRecordToResource(ArchiveRecord rec, - ARCReader reader) throws ResourceNotAvailableException, IOException { + ArchiveReader reader) throws ResourceNotAvailableException, IOException { if (!(rec instanceof ARCRecord)) { throw new ResourceNotAvailableException("Bad ARCRecord format"); @@ -125,7 +133,7 @@ } public static Resource WARCArchiveRecordToResource(ArchiveRecord rec, - WARCReader reader) throws ResourceNotAvailableException, IOException { + ArchiveReader reader) throws ResourceNotAvailableException, IOException { if (!(rec instanceof WARCRecord)) { throw new ResourceNotAvailableException("Bad WARCRecord format"); Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -27,6 +27,7 @@ import org.apache.commons.httpclient.HttpParser; import org.apache.commons.httpclient.StatusLine; import org.apache.commons.httpclient.util.EncodingUtil; +import org.archive.io.ArchiveReader; import org.archive.io.RecoverableIOException; import org.archive.io.arc.ARCConstants; import org.archive.io.warc.WARCReader; @@ -36,12 +37,12 @@ public class WarcResource extends Resource { private WARCRecord rec = null; - private WARCReader reader = null; + private ArchiveReader reader = null; private Map<String, String> headers = null; private long length = 0; private int status = 0; private boolean parsedHeaders = false; - public WarcResource(WARCRecord rec, WARCReader reader) { + public WarcResource(WARCRecord rec, ArchiveReader reader) { this.rec = rec; this.reader = reader; } Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/ARCCreator.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -26,9 +26,11 @@ import java.text.ParseException; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Logger; +import org.archive.io.WriterPoolSettings; import org.archive.io.arc.ARCConstants; import org.archive.io.arc.ARCWriter; import org.archive.util.ArchiveUtils; @@ -98,9 +100,12 @@ throws IOException { File target[] = {tgtDir}; + +// ARCWriter writer = new ARCWriter(new AtomicInteger(), +// Arrays.asList(target),prefix,true, +// ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE); ARCWriter writer = new ARCWriter(new AtomicInteger(), - Arrays.asList(target),prefix,true, - ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE); + getSettings(true,prefix,Arrays.asList(target))); File sources[] = srcDir.listFiles(); LOGGER.info("Found " + sources.length + " files in " + srcDir); for(int i = 0; i<sources.length; i++) { @@ -121,6 +126,43 @@ LOGGER.info("Closed arc file named " + writer.getFile().getAbsolutePath()); } + private WriterPoolSettings getSettings(final boolean isCompressed, + final String prefix, final List<File> arcDirs) { + return new WriterPoolSettings() { + public List<File> getOutputDirs() { + return arcDirs; + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + public List getMetadata() { + return null; + } + + public String getPrefix() { + return prefix; + } + + public boolean getCompress() { + return isCompressed; + } + + public long getMaxFileSizeBytes() { + return ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE; + } + + public String getTemplate() { + return "${prefix}-${timestamp17}-${serialno}"; + } + + public boolean getFrequentFlushes() { + return false; + } + + public int getWriteBufferSize() { + return 4096; + } + }; + } /** * @param args Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/WARCHeader.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -26,8 +26,14 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import org.archive.io.WriterPoolSettings; +import org.archive.io.arc.ARCConstants; import org.archive.io.warc.WARCWriter; +import org.archive.io.warc.WARCWriterPoolSettings; +import org.archive.uid.RecordIDGenerator; +import org.archive.uid.UUIDGenerator; import org.archive.util.anvl.ANVLRecord; public class WARCHeader { @@ -45,8 +51,9 @@ List<String> metadata = new ArrayList<String>(1); metadata.add(ar.toString()); - writer = new WARCWriter(null, bos, target, true, null, - metadata); +// writer = new WARCWriter(new AtomicInteger(),null, bos, target, true, null, +// metadata); + writer = new WARCWriter(new AtomicInteger(),bos,target,getSettings(true, null, null, metadata)); // Write a warcinfo record with description about how this WARC // was made. writer.writeWarcinfoRecord(target.getName(), "Made from " @@ -54,7 +61,48 @@ + this.getClass().getName()); } + private WARCWriterPoolSettings getSettings(final boolean isCompressed, + final String prefix, final List<File> arcDirs, final List metadata) { + return new WARCWriterPoolSettings() { + public List<File> getOutputDirs() { + return arcDirs; + } + @SuppressWarnings({ "unchecked", "rawtypes" }) + public List getMetadata() { + return metadata; + } + + public String getPrefix() { + return prefix; + } + + public boolean getCompress() { + return isCompressed; + } + + public long getMaxFileSizeBytes() { + return ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE; + } + + public String getTemplate() { + return "${prefix}-${timestamp17}-${serialno}"; + } + + public boolean getFrequentFlushes() { + return false; + } + + public int getWriteBufferSize() { + return 4096; + } + + public RecordIDGenerator getRecordIDGenerator() { + return new UUIDGenerator(); + } + }; + } + public static void main(String[] args) { if (args.length != 3) { System.err.println("USAGE: tgtWarc fieldsSrc id"); Modified: branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/webapp/PortMapper.java =================================================================== --- branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/webapp/PortMapper.java 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-core/src/main/java/org/archive/wayback/util/webapp/PortMapper.java 2011-04-19 22:41:25 UTC (rev 3437) @@ -130,9 +130,9 @@ String host = requestToHost(request); String contextPath = request.getContextPath(); StringBuilder pathPrefix = new StringBuilder(contextPath); - if(contextPath.length() == 0) { +// if(contextPath.length() == 0) { pathPrefix.append("/"); - } +// } String firstPath = requestToFirstPath(request); RequestHandler handler = pathMap.get(hostPathToKey(host,firstPath)); if(handler != null) { Modified: branches/wayback-1_6_1/wayback-hadoop/pom.xml =================================================================== --- branches/wayback-1_6_1/wayback-hadoop/pom.xml 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-hadoop/pom.xml 2011-04-19 22:41:25 UTC (rev 3437) @@ -8,7 +8,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.6.0</version> + <version>1.6.1</version> </parent> <artifactId>wayback-hadoop</artifactId> Modified: branches/wayback-1_6_1/wayback-hadoop-java/pom.xml =================================================================== --- branches/wayback-1_6_1/wayback-hadoop-java/pom.xml 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-hadoop-java/pom.xml 2011-04-19 22:41:25 UTC (rev 3437) @@ -8,7 +8,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.6.0</version> + <version>1.6.1</version> </parent> <artifactId>wayback-hadoop-java</artifactId> Modified: branches/wayback-1_6_1/wayback-webapp/pom.xml =================================================================== --- branches/wayback-1_6_1/wayback-webapp/pom.xml 2011-04-16 17:37:26 UTC (rev 3436) +++ branches/wayback-1_6_1/wayback-webapp/pom.xml 2011-04-19 22:41:25 UTC (rev 3437) @@ -7,7 +7,7 @@ <parent> <artifactId>wayback</artifactId> <groupId>org.archive.wayback</groupId> - <version>1.6.0</version> + <version>1.6.1</version> </parent> <artifactId>wayback-webapp</artifactId> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |