From: <bra...@us...> - 2008-08-19 02:49:46
|
Revision: 2562 http://archive-access.svn.sourceforge.net/archive-access/?rev=2562&view=rev Author: bradtofel Date: 2008-08-19 02:49:55 +0000 (Tue, 19 Aug 2008) Log Message: ----------- REVERTED: changes to "url" and "capturetimestamp" - was causing problems with WAXToolbar. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2008-08-19 02:49:03 UTC (rev 2561) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2008-08-19 02:49:55 UTC (rev 2562) @@ -39,7 +39,7 @@ private long cachedOffset = -1; private long cachedDate = -1; - public static final String CAPTURE_ORIGINAL_URL = "originalurl"; + public static final String CAPTURE_ORIGINAL_URL = "url"; /** * Result: canonicalized(lookup key) form of URL of captured document @@ -49,7 +49,7 @@ /** * Result: 14-digit timestamp when document was captured */ - public static final String CAPTURE_CAPTURE_TIMESTAMP = "capturetimestamp"; + public static final String CAPTURE_CAPTURE_TIMESTAMP = "capturedate"; /** * Result: basename of ARC file containing this document. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2009-10-15 22:44:24
|
Revision: 2808 http://archive-access.svn.sourceforge.net/archive-access/?rev=2808&view=rev Author: bradtofel Date: 2009-10-15 22:44:10 +0000 (Thu, 15 Oct 2009) Log Message: ----------- FEATURE: added endOffset and robotFlags accessors. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2009-10-15 22:33:53 UTC (rev 2807) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2009-10-15 22:44:10 UTC (rev 2808) @@ -37,9 +37,11 @@ public class CaptureSearchResult extends SearchResult { private long cachedOffset = -1; + private long cachedEndOffset = -1; private long cachedDate = -1; public static final String CAPTURE_ORIGINAL_URL = "url"; + public static final String CAPTURE_ORIGINAL_HOST = "host"; /** * Result: canonicalized(lookup key) form of URL of captured document @@ -52,19 +54,19 @@ public static final String CAPTURE_CAPTURE_TIMESTAMP = "capturedate"; /** - * Result: basename of ARC file containing this document. + * Result: basename of ARC/WARC file containing this document. */ public static final String CAPTURE_FILE = "file"; /** - * Result: compressed byte offset within ARC file where this document's + * Result: compressed byte offset within ARC/WARC file where this document's * gzip envelope begins. */ public static final String CAPTURE_OFFSET = "compressedoffset"; /** - * Result: compressed byte offset within ARC file where this document's - * gzip envelope Ends. + * Result: compressed byte offset within ARC/WARC file where this document's + * gzip envelope Ends. */ public static final String CAPTURE_END_OFFSET = "compressedendoffset"; @@ -93,6 +95,20 @@ public static final String CAPTURE_REDIRECT_URL = "redirecturl"; /** + * Result: String flags which indicate robot instructions found in an HTML + * page. Currently one or more of: + * <li>"A" - noarchive</li> + * <li>"F" - nofollow</li> + * <li>"I" - noindex</li> + * @see http://noarchive.net/ + */ + public static final String CAPTURE_ROBOT_FLAGS = "robotflags"; + + public static final String CAPTURE_ROBOT_NOARCHIVE = "A"; + public static final String CAPTURE_ROBOT_NOFOLLOW = "F"; + public static final String CAPTURE_ROBOT_NOINDEX = "I"; + + /** * Result: flag within a SearchResult that indicates this is the closest to * a particular requested date. */ @@ -127,14 +143,36 @@ */ public static final String CAPTURE_DUPLICATE_HTTP = "http"; public String getOriginalUrl() { - return get(CAPTURE_ORIGINAL_URL); + String url = get(CAPTURE_ORIGINAL_URL); + if(url == null) { + // convert from ORIG_HOST to ORIG_URL here: + url = getUrlKey(); + String host = get(CAPTURE_ORIGINAL_HOST); + if(url != null && host != null) { + StringBuilder sb = new StringBuilder(url.length()); + sb.append(UrlOperations.DEFAULT_SCHEME); + sb.append(host); + sb.append(UrlOperations.getURLPath(url)); + url = sb.toString(); + // cache it for next time...? + setOriginalUrl(url); + } + } + return url; } public void setOriginalUrl(String originalUrl) { put(CAPTURE_ORIGINAL_URL,originalUrl); } public String getOriginalHost() { - return UrlOperations.urlToHost(getOriginalUrl()); + String host = get(CAPTURE_ORIGINAL_HOST); + if(host == null) { + host = UrlOperations.urlToHost(getOriginalUrl()); + } + return host; } + public void setOriginalHost(String originalHost) { + put(CAPTURE_ORIGINAL_HOST,originalHost); + } public String getUrlKey() { return get(CAPTURE_URL_KEY); } @@ -173,6 +211,16 @@ cachedOffset = offset; put(CAPTURE_OFFSET,String.valueOf(offset)); } + public long getEndOffset() { + if(cachedEndOffset == -1) { + cachedEndOffset = Long.parseLong(get(CAPTURE_END_OFFSET)); + } + return cachedEndOffset; + } + public void setEndOffset(long offset) { + cachedEndOffset = offset; + put(CAPTURE_END_OFFSET,String.valueOf(offset)); + } public String getMimeType() { return get(CAPTURE_MIME_TYPE); } @@ -253,4 +301,46 @@ } return null; } + public String getRobotFlags() { + return get(CAPTURE_ROBOT_FLAGS); + } + public void setRobotFlags(String robotFlags) { + put(CAPTURE_ROBOT_FLAGS,robotFlags); + } + public void setRobotFlag(String flag) { + String flags = get(CAPTURE_ROBOT_FLAGS); + if(flags == null) { + flags = ""; + } + if(!flags.contains(flag)) { + flags = flags + flag; + } + put(CAPTURE_ROBOT_FLAGS,flags); + } + public boolean isRobotFlagSet(String flag) { + String flags = get(CAPTURE_ROBOT_FLAGS); + if(flags == null) { + return false; + } + return flags.contains(flag); + } + + public boolean isRobotNoArchive() { + return isRobotFlagSet(CAPTURE_ROBOT_NOARCHIVE); + } + public boolean isRobotNoIndex() { + return isRobotFlagSet(CAPTURE_ROBOT_NOINDEX); + } + public boolean isRobotNoFollow() { + return isRobotFlagSet(CAPTURE_ROBOT_NOFOLLOW); + } + public void setRobotNoArchive() { + setRobotFlag(CAPTURE_ROBOT_NOARCHIVE); + } + public void setRobotNoIndex() { + setRobotFlag(CAPTURE_ROBOT_NOARCHIVE); + } + public void setRobotNoFollow() { + setRobotFlag(CAPTURE_ROBOT_NOARCHIVE); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2009-10-28 00:08:09
|
Revision: 2850 http://archive-access.svn.sourceforge.net/archive-access/?rev=2850&view=rev Author: bradtofel Date: 2009-10-28 00:08:00 +0000 (Wed, 28 Oct 2009) Log Message: ----------- BUGFIX(unreported): test for null before attempting to decode String into long Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2009-10-28 00:06:30 UTC (rev 2849) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2009-10-28 00:08:00 UTC (rev 2850) @@ -213,7 +213,8 @@ } public long getEndOffset() { if(cachedEndOffset == -1) { - cachedEndOffset = Long.parseLong(get(CAPTURE_END_OFFSET)); + String tmp = get(CAPTURE_END_OFFSET); + cachedEndOffset = tmp == null ? -1 : Long.parseLong(tmp); } return cachedEndOffset; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-12-18 03:49:05
|
Revision: 3580 http://archive-access.svn.sourceforge.net/archive-access/?rev=3580&view=rev Author: bradtofel Date: 2011-12-18 03:48:59 +0000 (Sun, 18 Dec 2011) Log Message: ----------- Changed EndOffset to CompressedLength Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2011-12-18 01:00:02 UTC (rev 3579) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResult.java 2011-12-18 03:48:59 UTC (rev 3580) @@ -32,7 +32,7 @@ public class CaptureSearchResult extends SearchResult { private long cachedOffset = -1; - private long cachedEndOffset = -1; + private long cachedCompressedLength = -1; private long cachedDate = -1; public static final String CAPTURE_ORIGINAL_URL = "url"; @@ -63,7 +63,7 @@ * Result: compressed byte offset within ARC/WARC file where this document's * gzip envelope Ends. */ - public static final String CAPTURE_END_OFFSET = "compressedendoffset"; + public static final String CAPTURE_COMPRESSED_LENGTH = "compressedendoffset"; /** * Result: best-guess at mime-type of this document. @@ -215,16 +215,16 @@ cachedOffset = offset; put(CAPTURE_OFFSET,String.valueOf(offset)); } - public long getEndOffset() { - if(cachedEndOffset == -1) { - String tmp = get(CAPTURE_END_OFFSET); - cachedEndOffset = tmp == null ? -1 : Long.parseLong(tmp); + public long getCompressedLength() { + if(cachedCompressedLength == -1) { + String tmp = get(CAPTURE_COMPRESSED_LENGTH); + cachedCompressedLength = tmp == null ? -1 : Long.parseLong(tmp); } - return cachedEndOffset; + return cachedCompressedLength; } - public void setEndOffset(long offset) { - cachedEndOffset = offset; - put(CAPTURE_END_OFFSET,String.valueOf(offset)); + public void setCompressedLength(long offset) { + cachedCompressedLength = offset; + put(CAPTURE_COMPRESSED_LENGTH,String.valueOf(offset)); } public String getMimeType() { return get(CAPTURE_MIME_TYPE); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |