From: <bra...@us...> - 2009-11-06 00:04:02
|
Revision: 2883 http://archive-access.svn.sourceforge.net/archive-access/?rev=2883&view=rev Author: bradtofel Date: 2009-11-06 00:03:54 +0000 (Fri, 06 Nov 2009) Log Message: ----------- INITIAL REV: classes which enable indirection in serialization of CaptureSearchResults Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormatException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/DigestCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/FilenameCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/HTTPCodeCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/MIMETypeCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/OriginalURLCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RedirectURLCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RobotFlagsCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/StartOffsetCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/TimestampCDXField.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/URLKeyCDXField.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,34 @@ +/* CDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public interface CDXField { + public static String DEFAULT_VALUE = "-"; + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException; + public String serialize(CaptureSearchResult result); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,200 @@ +/* CDXFormat + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +/** + * Class which allows serialization/deserialization of CaptureSearchResult + * objects into/out of a single line String representation. + * + * + * @author brad + * + */ +public class CDXFormat { + + /* + * A canonized url + * B news group + * C rulespace category *** + * D compressed dat file offset + * F canonized frame + * G multi-columm language description (* soon) + * H canonized host + * I canonized image + * J canonized jump point + * K Some weird FBIS what's changed kinda thing + * L canonized link + * M meta tags (AIF) * + * N massaged url + * P canonized path + * Q language string + * R canonized redirect + * U uniqness *** + * V compressed arc file offset * + * X canonized url in other href tages + * Y canonized url in other src tags + * Z canonized url found in script + * a original url ** + * b date ** + * c old style checksum * + * d uncompressed dat file offset + * e IP ** + * f frame * + * g file name + * h original host + * i image * + * j original jump point + * k new style checksum * + * l link * + * m mime type of original document * + * n arc document length * + * o port + * p original path + * r redirect * + * s response code * + * t title * + * v uncompressed arc file offset * + * x url in other href tages * + * y url in other src tags * + * z url found in script * + * # comment + * + * * in alexa-made dat file + * ** in alexa-made dat file meta-data line + * *** future data + */ + + private CDXField[] fields = null; + private char delimiter = ' '; + private String delimiterS = null; + + public static String CDX_MAGIC = " CDX"; + + public static char URL_KEY = 'A'; + public static char TIMESTAMP = 'b'; + public static char ORIGINAL_URL = 'a'; + public static char MIME_TYPE = 'm'; + public static char HTTP_CODE = 's'; + public static char DIGEST = 'k'; + public static char REDIRECT = 'r'; + public static char ROBOT_FLAGS = 'M'; + public static char COMPRESSED_OFFSET = 'V'; + public static char COMPRESSED_LENGTH = 'n'; + public static char FILE = 'g'; + + /** + * Construct a CDXFormat reader/writer based on the specification argument + * @param cdxSpec + * @throws CDXFormatException + */ + public CDXFormat(String cdxSpec) throws CDXFormatException { + if(!cdxSpec.startsWith(CDX_MAGIC)) { + throw new CDXFormatException("Spec '" + cdxSpec + + "' does not start with '" + CDX_MAGIC + "'"); + } + delimiter = cdxSpec.charAt(CDX_MAGIC.length()); + String fieldsString = cdxSpec.substring(CDX_MAGIC.length()+1); + int fieldCount = (fieldsString.length() + 1) / 2; + if(fieldsString.length() != (fieldCount * 2) - 1) { + throw new CDXFormatException("Extra char after spec '" + + cdxSpec + "'"); + } + fields = new CDXField[fieldCount]; + for(int i = 0; i < fieldCount; i++) { + char f = fieldsString.charAt(i * 2); + if(i < fieldCount - 1) { + char d = fieldsString.charAt((i*2)+1); + if(d != delimiter) { + throw new CDXFormatException("Non-delimiter char in '" + + fieldsString + "'"); + } + } + fields[i] = getField(f); + } + delimiterS = new String(""+delimiter); + } + + private CDXField getField(char fieldChar) throws CDXFormatException { + CDXField field = null; + switch (fieldChar) { + case 'A': field = new URLKeyCDXField(); break; + // backvards compat with Alexa tools: + case 'N': field = new URLKeyCDXField(); break; + case 'b': field = new TimestampCDXField(); break; + case 'a': field = new OriginalURLCDXField(); break; + case 'm': field = new MIMETypeCDXField(); break; + case 's': field = new HTTPCodeCDXField(); break; + case 'k': field = new DigestCDXField(); break; + case 'r': field = new RedirectURLCDXField(); break; + case 'M': field = new RobotFlagsCDXField(); break; + case 'V': field = new StartOffsetCDXField(); break; + // NOT IMPLEMENTED in ARC/WARCReaders... +// case 'n': field = new EndOffsetCDXField(); break; + case 'g': field = new FilenameCDXField(); break; + } + if(field == null) { + throw new CDXFormatException("Unknown field '"+fieldChar+"'"); + } + return field; + } + + /** + * @param line + * @return CaptureSearchResult containing data from the 'line' argument + * parsed according the the specification for this CDXFormat + * @throws CDXFormatException + */ + public CaptureSearchResult parseResult(String line) + throws CDXFormatException { + CaptureSearchResult result = new CaptureSearchResult(); + String[] parts = line.split(delimiterS); + + if(parts.length != fields.length) { + throw new CDXFormatException("Wrong number of fields"); + } + for(int i = 0; i < fields.length; i++) { + fields[i].apply(parts[i], result); + } + return result; + } + + /** + * @param result + * @return String representation of the data in 'result' formatted according + * to the specification for this CDXFormat + */ + public String serializeResult(CaptureSearchResult result) { + StringBuilder sb = new StringBuilder(100); + for(int i = 0; i < fields.length; i++) { + sb.append(fields[i].serialize(result)); + if(i < fields.length - 1) { + sb.append(delimiter); + } + } + return sb.toString(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormat.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormatException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormatException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormatException.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,38 @@ +/* CDXFormatException + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +public class CDXFormatException extends Exception { + + public CDXFormatException(String string) { + super(string); + } + + /** + * + */ + private static final long serialVersionUID = 1L; + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/CDXFormatException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/DigestCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/DigestCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/DigestCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* DigestCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class DigestCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setDigest(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getDigest(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/DigestCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,47 @@ +/* EndOffsetCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class EndOffsetCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + try { + result.setEndOffset(Long.parseLong(field)); + } catch(NumberFormatException e) { + throw new CDXFormatException(e.getLocalizedMessage()); + } + } + + public String serialize(CaptureSearchResult result) { + long r = result.getEndOffset(); + if(r == -1) { + return DEFAULT_VALUE; + } + return String.valueOf(r); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/EndOffsetCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/FilenameCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/FilenameCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/FilenameCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* FilenameCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class FilenameCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setFile(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getFile(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/FilenameCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/HTTPCodeCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/HTTPCodeCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/HTTPCodeCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* HTTPCodeCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class HTTPCodeCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setHttpCode(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getHttpCode(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/HTTPCodeCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/MIMETypeCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/MIMETypeCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/MIMETypeCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* MIMETypeCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class MIMETypeCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setMimeType(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getMimeType(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/MIMETypeCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/OriginalURLCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/OriginalURLCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/OriginalURLCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* OriginalURLCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class OriginalURLCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setOriginalUrl(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getOriginalUrl(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/OriginalURLCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RedirectURLCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RedirectURLCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RedirectURLCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* RedirectURLCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class RedirectURLCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setRedirectUrl(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getRedirectUrl(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RedirectURLCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RobotFlagsCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RobotFlagsCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RobotFlagsCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* RobotFlagsCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class RobotFlagsCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setRobotFlags(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getRobotFlags(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/RobotFlagsCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/StartOffsetCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/StartOffsetCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/StartOffsetCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,46 @@ +/* StartOffsetCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class StartOffsetCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + try { + result.setOffset(Long.parseLong(field)); + } catch(NumberFormatException e) { + throw new CDXFormatException(e.getLocalizedMessage()); + } + } + public String serialize(CaptureSearchResult result) { + long r = result.getOffset(); + if(r == -1) { + return DEFAULT_VALUE; + } + return String.valueOf(r); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/StartOffsetCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/TimestampCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/TimestampCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/TimestampCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* TimestampCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class TimestampCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setCaptureTimestamp(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getCaptureTimestamp(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/TimestampCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/URLKeyCDXField.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/URLKeyCDXField.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/URLKeyCDXField.java 2009-11-06 00:03:54 UTC (rev 2883) @@ -0,0 +1,40 @@ +/* URLKeyCDXField + * + * $Id$ + * + * Created on 4:00:41 PM Apr 13, 2009. + * + * Copyright (C) 2009 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.cdx.format; + +import org.archive.wayback.core.CaptureSearchResult; + +public class URLKeyCDXField implements CDXField { + + public void apply(String field, CaptureSearchResult result) + throws CDXFormatException { + result.setUrlKey(field); + } + + public String serialize(CaptureSearchResult result) { + String r = result.getUrlKey(); + return r == null ? DEFAULT_VALUE : r; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/cdx/format/URLKeyCDXField.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |