From: <bra...@us...> - 2011-12-18 03:52:11
|
Revision: 3583 http://archive-access.svn.sourceforge.net/archive-access/?rev=3583&view=rev Author: bradtofel Date: 2011-12-18 03:52:05 +0000 (Sun, 18 Dec 2011) Log Message: ----------- FEATURE: now allows 11 fields - including the compressed length field.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java 2011-12-18 03:51:19 UTC (rev 3582) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFlexFormat.java 2011-12-18 03:52:05 UTC (rev 3583) @@ -42,9 +42,12 @@ CaptureSearchResult result = new CaptureSearchResult(); String[] tokens = line.split(" "); boolean hasRobotFlags = false; + boolean hasCompressedLength = false; if (tokens.length != 9) { if(tokens.length == 10) { hasRobotFlags = true; + } else if(tokens.length == 11) { + hasCompressedLength = true; } else { return null; } @@ -72,10 +75,18 @@ result.setRobotFlags(tokens[nextToken]); nextToken++; } + String length = "-"; + if(hasCompressedLength) { + length = tokens[nextToken]; + nextToken++; + } if(!tokens[nextToken].equals("-")) { try { compressedOffset = Long.parseLong(tokens[nextToken]); + if(!length.equals("-")) { + result.setCompressedLength(compressedOffset + Long.parseLong(length)); + } } catch (NumberFormatException e) { LOGGER.warning("Bad compressed Offset field("+nextToken+") in (" + line +")"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java 2011-12-18 03:51:19 UTC (rev 3582) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java 2011-12-18 03:52:05 UTC (rev 3583) @@ -147,8 +147,8 @@ case 'r': field = new RedirectURLCDXField(); break; case 'M': field = new RobotFlagsCDXField(); break; case 'V': field = new StartOffsetCDXField(); break; - // NOT IMPLEMENTED in ARC/WARCReaders... -// case 'n': field = new EndOffsetCDXField(); break; + // Experimental.. + case 'S': field = new CompressedLengthCDXField(); break; case 'g': field = new FilenameCDXField(); break; } if(field == null) { Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java (from rev 3539, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CompressedLengthCDXField.java 2011-12-18 03:52:05 UTC (rev 3583) @@ -0,0 +1,42 @@ +/* + * This file is part of the Wayback archival access software + * (http://archive-access.sourceforge.net/projects/wayback/). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class CompressedLengthCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + try { + result.setCompressedLength(Long.parseLong(field)); + } catch(NumberFormatException e) { + throw new CDXFormatException(e.getLocalizedMessage()); + } + } + + public String serialize(CaptureSearchResult result) { + long r = result.getCompressedLength(); + if(r == -1) { + return DEFAULT_VALUE; + } + return String.valueOf(r); + } +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java 2011-12-18 03:51:19 UTC (rev 3582) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java 2011-12-18 03:52:05 UTC (rev 3583) @@ -1,42 +0,0 @@ -/* - * This file is part of the Wayback archival access software - * (http://archive-access.sourceforge.net/projects/wayback/). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.wayback.resourceindex.cdx.format; - -import org.archive.wayback.core.CaptureSearchResult; - -public class EndOffsetCDXField implements CDXField { - - public void apply(String field, CaptureSearchResult result) - throws CDXFormatException { - try { - result.setEndOffset(Long.parseLong(field)); - } catch(NumberFormatException e) { - throw new CDXFormatException(e.getLocalizedMessage()); - } - } - - public String serialize(CaptureSearchResult result) { - long r = result.getEndOffset(); - if(r == -1) { - return DEFAULT_VALUE; - } - return String.valueOf(r); - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |