|
From: <bra...@us...> - 2011-12-18 03:52:11
|
Revision: 3583
http://archive-access.svn.sourceforge.net/archive-access/?rev=3583&view=rev
Author: bradtofel
Date: 2011-12-18 03:52:05 +0000 (Sun, 18 Dec 2011)
Log Message:
-----------
FEATURE: now allows 11 fields - including the compressed length field..
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java
Added Paths:
-----------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java
Removed Paths:
-------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java 2011-12-18 03:51:19 UTC (rev 3582)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java 2011-12-18 03:52:05 UTC (rev 3583)
@@ -42,9 +42,12 @@
CaptureSearchResult result = new CaptureSearchResult();
String[] tokens = line.split(" ");
boolean hasRobotFlags = false;
+ boolean hasCompressedLength = false;
if (tokens.length != 9) {
if(tokens.length == 10) {
hasRobotFlags = true;
+ } else if(tokens.length == 11) {
+ hasCompressedLength = true;
} else {
return null;
}
@@ -72,10 +75,18 @@
result.setRobotFlags(tokens[nextToken]);
nextToken++;
}
+ String length = "-";
+ if(hasCompressedLength) {
+ length = tokens[nextToken];
+ nextToken++;
+ }
if(!tokens[nextToken].equals("-")) {
try {
compressedOffset = Long.parseLong(tokens[nextToken]);
+ if(!length.equals("-")) {
+ result.setCompressedLength(compressedOffset + Long.parseLong(length));
+ }
} catch (NumberFormatException e) {
LOGGER.warning("Bad compressed Offset field("+nextToken+") in (" +
line +")");
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java 2011-12-18 03:51:19 UTC (rev 3582)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java 2011-12-18 03:52:05 UTC (rev 3583)
@@ -147,8 +147,8 @@
case 'r': field = new RedirectURLCDXField(); break;
case 'M': field = new RobotFlagsCDXField(); break;
case 'V': field = new StartOffsetCDXField(); break;
- // NOT IMPLEMENTED in ARC/WARCReaders...
-// case 'n': field = new EndOffsetCDXField(); break;
+ // Experimental..
+ case 'S': field = new CompressedLengthCDXField(); break;
case 'g': field = new FilenameCDXField(); break;
}
if(field == null) {
Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java (from rev 3539, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java)
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java 2011-12-18 03:52:05 UTC (rev 3583)
@@ -0,0 +1,42 @@
+/*
+ * This file is part of the Wayback archival access software
+ * (http://archive-access.sourceforge.net/projects/wayback/).
+ *
+ * Licensed to the Internet Archive (IA) by one or more individual
+ * contributors.
+ *
+ * The IA licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.archive.wayback.resourceindex.cdx.format;
+
+import org.archive.wayback.core.CaptureSearchResult;
+
+public class CompressedLengthCDXField implements CDXField {
+
+ public void apply(String field, CaptureSearchResult result)
+ throws CDXFormatException {
+ try {
+ result.setCompressedLength(Long.parseLong(field));
+ } catch(NumberFormatException e) {
+ throw new CDXFormatException(e.getLocalizedMessage());
+ }
+ }
+
+ public String serialize(CaptureSearchResult result) {
+ long r = result.getCompressedLength();
+ if(r == -1) {
+ return DEFAULT_VALUE;
+ }
+ return String.valueOf(r);
+ }
+}
Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java 2011-12-18 03:51:19 UTC (rev 3582)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java 2011-12-18 03:52:05 UTC (rev 3583)
@@ -1,42 +0,0 @@
-/*
- * This file is part of the Wayback archival access software
- * (http://archive-access.sourceforge.net/projects/wayback/).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.archive.wayback.resourceindex.cdx.format;
-
-import org.archive.wayback.core.CaptureSearchResult;
-
-public class EndOffsetCDXField implements CDXField {
-
- public void apply(String field, CaptureSearchResult result)
- throws CDXFormatException {
- try {
- result.setEndOffset(Long.parseLong(field));
- } catch(NumberFormatException e) {
- throw new CDXFormatException(e.getLocalizedMessage());
- }
- }
-
- public String serialize(CaptureSearchResult result) {
- long r = result.getEndOffset();
- if(r == -1) {
- return DEFAULT_VALUE;
- }
- return String.valueOf(r);
- }
-}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|