From: <bra...@us...> - 2009-10-23 00:39:17
|
Revision: 2824 http://archive-access.svn.sourceforge.net/archive-access/?rev=2824&view=rev Author: bradtofel Date: 2009-10-23 00:39:07 +0000 (Fri, 23 Oct 2009) Log Message: ----------- FEATURE: Now can handle an extra robot-meta tag info field. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2009-10-23 00:37:11 UTC (rev 2823) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/CDXLineToSearchResultAdapter.java 2009-10-23 00:39:07 UTC (rev 2824) @@ -70,8 +70,13 @@ public static CaptureSearchResult doAdapt(String line) { CaptureSearchResult result = new CaptureSearchResult(); String[] tokens = line.split(" "); + boolean hasRobotFlags = false; if (tokens.length != 9) { - return null; + if(tokens.length == 10) { + hasRobotFlags = true; + } else { + return null; + } //throw new IllegalArgumentException("Need 9 columns("+line+")"); } String urlKey = tokens[0]; @@ -91,10 +96,17 @@ String digest = tokens[5]; String redirectUrl = tokens[6]; long compressedOffset = -1; - if(!tokens[7].equals("-")) { - compressedOffset = Long.parseLong(tokens[7]); + int nextToken = 7; + if(hasRobotFlags) { + result.setRobotFlags(tokens[nextToken]); + nextToken++; } - String fileName = tokens[8]; + + if(!tokens[nextToken].equals("-")) { + compressedOffset = Long.parseLong(tokens[nextToken]); + } + nextToken++; + String fileName = tokens[nextToken]; result.setUrlKey(urlKey); result.setCaptureTimestamp(captureTS); result.setOriginalUrl(originalUrl); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2009-10-23 00:37:11 UTC (rev 2823) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/SearchResultToCDXLineAdapter.java 2009-10-23 00:39:07 UTC (rev 2824) @@ -41,7 +41,14 @@ private static int DEFAULT_CAPACITY = 120; private final static String DELIMITER = " "; + private boolean outputRobot = false; + public boolean isOutputRobot() { + return outputRobot; + } + public void setIsOutputRobot(boolean isOutputRobot) { + this.outputRobot = isOutputRobot; + } /* (non-Javadoc) * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) */ @@ -63,15 +70,29 @@ sb.append(DELIMITER); sb.append(result.getRedirectUrl()); sb.append(DELIMITER); + if(outputRobot) { + String robotFlags = result.getRobotFlags(); + if(robotFlags == null || robotFlags.equals("")) { + robotFlags = "-"; + } + sb.append(robotFlags); + sb.append(DELIMITER); + } sb.append(result.getOffset()); sb.append(DELIMITER); sb.append(result.getFile()); return sb.toString(); } - public static Iterator<String> adapt(Iterator<CaptureSearchResult> input) { - return new AdaptedIterator<CaptureSearchResult,String>(input, - new SearchResultToCDXLineAdapter()); + return adapt(input,false); } + + public static Iterator<String> adapt(Iterator<CaptureSearchResult> input, + boolean isOutputRobot) { + SearchResultToCDXLineAdapter adapter = + new SearchResultToCDXLineAdapter(); + adapter.setIsOutputRobot(isOutputRobot); + return new AdaptedIterator<CaptureSearchResult,String>(input,adapter); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |