You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: Brad <bra...@us...> - 2005-10-19 01:22:47
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/rawreplayui In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/rawreplayui Modified Files: RawReplayUI.java Log Message: lots of javadoc comments Index: RawReplayUI.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/rawreplayui/RawReplayUI.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RawReplayUI.java 18 Oct 2005 02:30:50 -0000 1.1 --- RawReplayUI.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* RawReplayUI + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.rawreplayui; *************** *** 31,34 **** --- 54,64 ---- import org.archive.wayback.exception.WaybackException; + /** + * Simple implementation of ReplayUI, providing basic error messages, redirect + * to closest match, and unmodified HTTP access to ARC Records. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class RawReplayUI implements ReplayUI, RequestParser { private final static String JSP_PATH = "replayui.jsppath"; *************** *** 39,45 **** private String jspPath = null; public RawReplayUI() { super(); - // TODO Auto-generated constructor stub } --- 69,77 ---- private String jspPath = null; + /** + * Constructor + */ public RawReplayUI() { super(); } *************** *** 96,100 **** return wmRequest; } catch (URIException e) { ! // TODO Auto-generated catch block e.printStackTrace(); } --- 128,132 ---- return wmRequest; } catch (URIException e) { ! e.printStackTrace(); } *************** *** 129,133 **** HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! // TODO Auto-generated method stub ResourceResults results; ResourceIndex idx = wayback.getResourceIndex(); --- 161,165 ---- HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! ResourceResults results; ResourceIndex idx = wayback.getResourceIndex(); *************** *** 173,181 **** } } ! // redirect to actual date if diff than request: if (!wmRequest.getExactDateRequest().equals( closest.getTimestamp().getDateStr())) { ! String newUrl = makeReplayURI(request,closest); response.sendRedirect(response.encodeRedirectURL(newUrl)); return; --- 205,213 ---- } } ! // redirect to actual date if diff than request: if (!wmRequest.getExactDateRequest().equals( closest.getTimestamp().getDateStr())) { ! String newUrl = makeReplayURI(request, closest); response.sendRedirect(response.encodeRedirectURL(newUrl)); return; *************** *** 254,265 **** } else { ! // String message = wmRequest.getRequestURI().getURI() + " on " ! // + wmRequest.getExactTimestamp().prettyDateTime() ! // + " is Not in the Archive"; String message = wmRequest.getRequestURI().getURI() + " is not in the Archive"; ! ! showError(message, request, response); } --- 286,296 ---- } else { ! // String message = wmRequest.getRequestURI().getURI() + " on " ! // + wmRequest.getExactTimestamp().prettyDateTime() ! // + " is Not in the Archive"; String message = wmRequest.getRequestURI().getURI() + " is not in the Archive"; ! showError(message, request, response); } *************** *** 282,285 **** --- 313,327 ---- } + /** + * Display page to user indicating that an exception happenned while + * processing their request + * + * @param wmRequest + * @param request + * @param response + * @param message + * @throws IOException + * @throws ServletException + */ public void showWaybackException(final WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response, *************** *** 289,295 **** } public void showError(String message, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! // TODO Auto-generated method stub request.setAttribute("message", message); --- 331,347 ---- } + /** + * Show a generic error message to the user, dispatching to a JSP to + * actually render the template page. + * + * @param message + * @param request + * @param response + * @throws IOException + * @throws ServletException + */ public void showError(String message, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! request.setAttribute("message", message); *************** *** 312,316 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 364,367 ---- |
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback Modified Files: ResourceStore.java ResourceIndex.java ReplayUI.java QueryUI.java RequestParser.java Log Message: lots of javadoc comments Index: QueryUI.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/QueryUI.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** QueryUI.java 18 Oct 2005 02:30:49 -0000 1.1 --- QueryUI.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* QueryUI + * + * Created on Oct 18, 2005 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (archive-access.sourceforge.net). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback; *************** *** 12,22 **** --- 35,91 ---- import org.archive.wayback.core.WaybackLogic; + /** + * Uses ResourceStore, ResourceIndex via WaybackLogic, to transform a WMRequest + * into first a set of ResourceResults, then into an format suitable for + * end-users. + * + * @author brad + * @version $Date$, $Revision$ + */ public interface QueryUI { + /** + * Initialize this QueryUI. Pass in the specific configurations via + * Properties. + * + * @param p + * Generic properties bag for configurations + * @throws IOException + */ public void init(final Properties p) throws IOException; + /** + * Process a Wayback Machine Query request. + * + * @param wayback + * WaybackLogic object + * @param wmRequest + * pre-parsed WMRequest object + * @param request + * HttpServletRequest + * @param response + * HttpServletResponse + * @throws IOException + * @throws ServletException + */ public void handle(final WaybackLogic wayback, final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; + /** + * Show results for a wayback request containing results for a single Url. + * + * @param wayback + * WaybackLogic object + * @param request + * HttpServletRequest + * @param response + * HttpServletResponse + * @param wmRequest + * pre-parsed WMRequest object + * @param results + * returns from ResourceIndex + * @throws IOException + * @throws ServletException + */ public void showQueryResults(WaybackLogic wayback, HttpServletRequest request, HttpServletResponse response, *************** *** 24,27 **** --- 93,112 ---- throws IOException, ServletException; + /** + * Show results for a wayback request containing results for multiple Urls. + * + * @param wayback + * WaybackLogic object + * @param request + * HttpServletRequest + * @param response + * HttpServletResponse + * @param wmRequest + * pre-parsed WMRequest object + * @param results + * returns from ResourceIndex + * @throws IOException + * @throws ServletException + */ public void showPathQueryResults(WaybackLogic wayback, HttpServletRequest request, HttpServletResponse response, *************** *** 29,36 **** --- 114,145 ---- throws IOException, ServletException; + /** + * Show error page for no results for a wayback request. + * + * @param wmRequest + * pre-parsed WMRequest object + * @param request + * HttpServletRequest + * @param response + * HttpServletResponse + * @throws IOException + * @throws ServletException + */ public void showNoMatches(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; + /** + * Show error page for inability to communicate with ResourceIndex. + * + * @param wmRequest + * pre-parsed WMRequest object + * @param request + * HttpServletRequest + * @param response + * HttpServletResponse + * @throws IOException + * @throws ServletException + */ public void showIndexNotAvailable(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) Index: ResourceStore.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ResourceStore.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ResourceStore.java 18 Oct 2005 02:30:49 -0000 1.1 --- ResourceStore.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ResourceStore + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback; *************** *** 7,13 **** --- 30,57 ---- import org.archive.wayback.core.Resource; + /** + * Transforms an ARCLocation into a Resource. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public interface ResourceStore { + /** + * Transform an ARCLocation into a Resource + * + * @param location + * @return Resource object retrieved from ARCLocation + * @throws IOException + */ public Resource retrieveResource(ARCLocation location) throws IOException; + /** + * Initialize this ResourceStore. Pass in the specific configurations via + * Properties. + * + * @param p + * Generic properties bag for configurations + * @throws Exception + */ public void init(Properties p) throws Exception; } Index: RequestParser.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/RequestParser.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** RequestParser.java 18 Oct 2005 02:30:49 -0000 1.1 --- RequestParser.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* RequestParser + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback; *************** *** 5,9 **** --- 28,46 ---- import org.archive.wayback.core.WMRequest; + /** + * Parser of user requests from URL, query argument, Cookies, sessionID, etc + * into a WMRequest object. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public interface RequestParser { + /** + * Attempt to extract a valid WMRequest object from the HttpServletRequest + * + * @param request + * @return null or the parsed WMRequest object representing the users + * request. + */ public WMRequest parseRequest(final HttpServletRequest request); } Index: ResourceIndex.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ResourceIndex.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ResourceIndex.java 18 Oct 2005 02:30:49 -0000 1.1 --- ResourceIndex.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ResourceIndex + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback; *************** *** 8,15 **** --- 31,62 ---- import org.archive.wayback.exception.WaybackException; + /** + * Transforms a WMRequest into a ResourceResults. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public interface ResourceIndex { + /** + * Transform a WMRequest into a ResourceResults. + * + * @param request + * @return ResourceResults containing ResourceResult objects matching the + * WMRequest + * + * @throws IOException + * @throws WaybackException + */ public ResourceResults query(final WMRequest request) throws IOException, WaybackException; + /** + * Initialize this ResourceIndex. Pass in the specific configurations via + * Properties. + * + * @param p + * Generic properties bag for configurations + * @throws Exception + */ public void init(Properties p) throws Exception; } Index: ReplayUI.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ReplayUI.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ReplayUI.java 18 Oct 2005 02:30:49 -0000 1.1 --- ReplayUI.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ReplayUI + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback; *************** *** 14,28 **** --- 37,96 ---- import org.archive.wayback.core.WaybackLogic; + /** + * Uses ResourceIndex, ResourceStore via WaybackLogic to transform a WMRequest + * into a user-viewable response, either the resource requested, a redirect to a + * better request, or an error message as to why the request failed. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public interface ReplayUI { + /** + * Initialize this ReplayUI. Pass in the specific configurations via + * Properties. + * + * @param p + * Generic properties bag for configurations + * @throws IOException + */ public void init(final Properties p) throws IOException; + /** + * @param request + * @param result + * @return user-viewable String URL that will replay the ResourceResult + */ public String makeReplayURI(final HttpServletRequest request, final ResourceResult result); + /** + * Process a Wayback Replay request, returning the resource requested, a + * redirect to a better/correct URL for the resource, or an error message. + * + * @param wayback + * @param wmRequest + * @param request + * @param response + * @throws IOException + * @throws ServletException + */ public void handle(final WaybackLogic wayback, final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; + /** + * Return a Resource to the user, performing whatever markup or alteration + * is required. + * + * @param wmRequest + * @param result + * @param resource + * @param request + * @param response + * @param results + * @throws IOException + * @throws ServletException + */ public void replayResource(final WMRequest wmRequest, final ResourceResult result, final Resource resource, *************** *** 31,38 **** --- 99,127 ---- throws IOException, ServletException; + /** + * Return an error page to the User indicating that the Resource they + * requested is not stored in the archive. + * + * @param wmRequest + * @param request + * @param response + * @throws IOException + * @throws ServletException + */ public void showNotInArchive(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; + /** + * Return an error page to the User indicating that the Resource they + * requested is stored in the archive, but is not presently available. + * + * @param wmRequest + * @param request + * @param response + * @param message + * @throws IOException + * @throws ServletException + */ public void showResourceNotAvailable(final WMRequest wmRequest, final HttpServletRequest request, *************** *** 40,43 **** --- 129,143 ---- throws IOException, ServletException; + /** + * Return an error page to the User indicating that the ResourceIndex is + * presently not available. + * + * @param wmRequest + * @param request + * @param response + * @param message + * @throws IOException + * @throws ServletException + */ public void showIndexNotAvailable(final WMRequest wmRequest, final HttpServletRequest request, |
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/core Modified Files: ResourceResults.java WaybackLogic.java Timestamp.java WMRequest.java Resource.java ResourceResult.java Log Message: lots of javadoc comments Index: ResourceResult.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/ResourceResult.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ResourceResult.java 18 Oct 2005 02:30:48 -0000 1.1 --- ResourceResult.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ResourceResult + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.core; *************** *** 5,8 **** --- 28,38 ---- import org.archive.io.arc.ARCLocation; + /** + * Encapsulates the data for a single Resource (in an ARC file) returned from a + * ResourceIndex query. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class ResourceResult { private final static String CDX_HEADER_STRING = " CDX N b h m s k r V g"; *************** *** 26,33 **** --- 56,71 ---- private String arcFileName = null; + /** + * Constructor + */ public ResourceResult() { super(); } + /** + * get the ARCLocation object corresponding to this ResourceResult. + * + * @return ARCLocation object. + */ public ARCLocation getARCLocation() { final String daArcName = arcFileName; *************** *** 48,51 **** --- 86,98 ---- } + /** + * Attempt to deserialize state from a single text line, fields delimited by + * spaces. There are standard ways to do this, and this is not one of + * them... for no good reason. + * + * @param line + * @param lineNumber + * @throws ParseException + */ public void parseLine(final String line, final int lineNumber) throws ParseException { *************** *** 65,68 **** --- 112,121 ---- } + /** + * get the CDX header line for the format serialized and deserialized in + * flat file format. + * + * @return String representation of the CDX header line, WITHOUT NEWLINE. + */ public static String getCDXHeaderString() { return CDX_HEADER_STRING; *************** *** 75,119 **** } ! public String toShortString() { ! return url + "\t" + timestamp.getDateStr() + "\t" + compressedOffset ! + "\t" + arcFileName; ! } ! public String getArcFileName() { return arcFileName; } public long getCompressedOffset() { return compressedOffset; } public String getHttpResponseCode() { return httpResponseCode; } public String getMd5Fragment() { return md5Fragment; } public String getMimeType() { return mimeType; } public String getOrigHost() { return origHost; } public String getRedirectUrl() { return redirectUrl; } public boolean isRedirect() { return (0 != redirectUrl.compareTo("-")); } public Timestamp getTimestamp() { return timestamp; } public String getUrl() { return url; --- 128,209 ---- } ! /** ! * @return arcFileName property ! */ public String getArcFileName() { return arcFileName; } + /** + * @return compressedOffset property + */ public long getCompressedOffset() { return compressedOffset; } + /** + * @return String representation of the HTTP response code property. + */ public String getHttpResponseCode() { return httpResponseCode; } + /** + * @return MD5 digest property in hex-dec format, possible truncated to less + * than 32 characters. + */ public String getMd5Fragment() { return md5Fragment; } + /** + * @return mimeType property + */ public String getMimeType() { return mimeType; } + /** + * @return the original fully qualified String hostname from which this + * resource was acquired. + */ public String getOrigHost() { return origHost; } + /** + * @return the String URL to which this resource redirects, or "-" if it + * does not redirect. + */ public String getRedirectUrl() { return redirectUrl; } + /** + * @return true if this resource is though to redirect to another URL, false + * otherwise. + */ public boolean isRedirect() { return (0 != redirectUrl.compareTo("-")); } + /** + * @return Returns the timestamp. + */ public Timestamp getTimestamp() { return timestamp; } + /** + * @param timestamp + * The timestamp to set. + */ + public void setTimestamp(Timestamp timestamp) { + this.timestamp = timestamp; + } + + /** + * @return Returns the url. + */ public String getUrl() { return url; *************** *** 121,165 **** /** ! * @param args */ ! public static void main(String[] args) { ! // TODO Auto-generated method stub ! } public void setArcFileName(String arcFileName) { this.arcFileName = arcFileName; } public void setCompressedOffset(long compressedOffset) { this.compressedOffset = compressedOffset; } public void setHttpResponseCode(String httpResponseCode) { this.httpResponseCode = httpResponseCode; } public void setMd5Fragment(String md5Fragment) { this.md5Fragment = md5Fragment; } public void setMimeType(String mimeType) { this.mimeType = mimeType; } public void setOrigHost(String origHost) { this.origHost = origHost; } public void setRedirectUrl(String redirectUrl) { this.redirectUrl = redirectUrl; } ! public void setTimeStamp(Timestamp timeStamp) { ! this.timestamp = timeStamp; ! } - public void setUrl(String url) { - this.url = url; } --- 211,282 ---- /** ! * @param url ! * The url to set. */ ! public void setUrl(String url) { ! this.url = url; } + /** + * @param arcFileName + * The arcFileName to set. + */ public void setArcFileName(String arcFileName) { this.arcFileName = arcFileName; } + /** + * @param compressedOffset + * The compressedOffset to set. + */ public void setCompressedOffset(long compressedOffset) { this.compressedOffset = compressedOffset; } + /** + * @param httpResponseCode + * The httpResponseCode to set. + */ public void setHttpResponseCode(String httpResponseCode) { this.httpResponseCode = httpResponseCode; } + /** + * @param md5Fragment + * The md5Fragment to set. + */ public void setMd5Fragment(String md5Fragment) { this.md5Fragment = md5Fragment; } + /** + * @param mimeType + * The mimeType to set. + */ public void setMimeType(String mimeType) { this.mimeType = mimeType; } + /** + * @param origHost + * The origHost to set. + */ public void setOrigHost(String origHost) { this.origHost = origHost; } + /** + * @param redirectUrl + * The redirectUrl to set. + */ public void setRedirectUrl(String redirectUrl) { this.redirectUrl = redirectUrl; } ! /** ! * @param args ! */ ! public static void main(String[] args) { } Index: WMRequest.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/WMRequest.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** WMRequest.java 18 Oct 2005 02:30:48 -0000 1.1 --- WMRequest.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* WMRequest + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.core; *************** *** 6,9 **** --- 29,39 ---- import org.archive.net.UURI; + /** + * Abstraction of all the data associated with a users request to the Wayback + * Machine. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class WMRequest { private final Pattern IMAGE_REGEX = Pattern *************** *** 30,127 **** private boolean pathQuery = false; ! public UURI getRedirectURI() { ! return redirectURI; } ! public void setRedirectURI(UURI redirectURI) { ! this.redirectURI = redirectURI; } ! public WMRequest() { ! super(); ! // TODO Auto-generated constructor stub } ! public boolean isRetrieval() { ! return this.retrieval; } ! public boolean isQuery() { ! return this.query; } ! public boolean isPathQuery() { ! return this.pathQuery; } public Timestamp getExactTimestamp() { return exactTimestamp; } public void setExactTimestamp(Timestamp exactTimestamp) { this.exactTimestamp = exactTimestamp; } ! public Timestamp getEndTimestamp() { ! return endTimestamp; } ! public void setEndTimestamp(Timestamp endTimestamp) { ! this.endTimestamp = endTimestamp; } public String getReferrerUrl() { return referrerUrl; } public void setReferrerUrl(String referrerUrl) { this.referrerUrl = referrerUrl; } public UURI getRequestURI() { return requestURI; } public void setRequestURI(UURI requestURI) { this.requestURI = requestURI; } public Timestamp getStartTimestamp() { return startTimestamp; } public void setStartTimestamp(Timestamp startTimestamp) { this.startTimestamp = startTimestamp; } ! private void resetType() { ! this.retrieval = false; ! this.query = false; ! this.pathQuery = false; ! } ! ! public void setPathQuery() { ! resetType(); ! this.pathQuery = true; ! } ! ! public void setQuery() { ! resetType(); ! this.query = true; } ! public void setRetrieval() { ! resetType(); ! this.retrieval = true; } ! public boolean isImageRetrieval() { ! String uri = requestURI.getEscapedURI(); ! Matcher matcher = null; ! matcher = IMAGE_REGEX.matcher(uri); ! if (matcher != null && matcher.matches()) { ! return true; ! } ! return false; } --- 60,238 ---- private boolean pathQuery = false; ! /** ! * Constructor ! */ ! public WMRequest() { ! super(); } ! private void resetType() { ! this.retrieval = false; ! this.query = false; ! this.pathQuery = false; } ! /** ! * sets the request type to PathQuery ! */ ! public void setPathQuery() { ! resetType(); ! this.pathQuery = true; } ! /** ! * sets the request type to Query ! */ ! public void setQuery() { ! resetType(); ! this.query = true; } ! /** ! * sets the request type to Retrieval/Replay ! */ ! public void setRetrieval() { ! resetType(); ! this.retrieval = true; } ! /** ! * @return whether this request appears to be for an image, using only the ! * requested URL ! */ ! public boolean isImageRetrieval() { ! String uri = requestURI.getEscapedURI(); ! Matcher matcher = null; ! matcher = IMAGE_REGEX.matcher(uri); ! if (matcher != null && matcher.matches()) { ! return true; ! } ! return false; ! } ! ! /** ! * @return Returns the endTimestamp. ! */ ! public Timestamp getEndTimestamp() { ! return endTimestamp; ! } ! ! /** ! * @param endTimestamp ! * The endTimestamp to set. ! */ ! public void setEndTimestamp(Timestamp endTimestamp) { ! this.endTimestamp = endTimestamp; ! } ! ! /** ! * @return Returns the exactDateRequest. ! */ ! public String getExactDateRequest() { ! return exactDateRequest; } + /** + * @param exactDateRequest + * The exactDateRequest to set. + */ + public void setExactDateRequest(String exactDateRequest) { + this.exactDateRequest = exactDateRequest; + } + + /** + * @return Returns the exactTimestamp. + */ public Timestamp getExactTimestamp() { return exactTimestamp; } + /** + * @param exactTimestamp + * The exactTimestamp to set. + */ public void setExactTimestamp(Timestamp exactTimestamp) { this.exactTimestamp = exactTimestamp; } ! /** ! * @return Returns the redirectURI. ! */ ! public UURI getRedirectURI() { ! return redirectURI; } ! /** ! * @param redirectURI ! * The redirectURI to set. ! */ ! public void setRedirectURI(UURI redirectURI) { ! this.redirectURI = redirectURI; } + /** + * @return Returns the referrerUrl. + */ public String getReferrerUrl() { return referrerUrl; } + /** + * @param referrerUrl + * The referrerUrl to set. + */ public void setReferrerUrl(String referrerUrl) { this.referrerUrl = referrerUrl; } + /** + * @return Returns the requestURI. + */ public UURI getRequestURI() { return requestURI; } + /** + * @param requestURI + * The requestURI to set. + */ public void setRequestURI(UURI requestURI) { this.requestURI = requestURI; } + /** + * @return Returns the startTimestamp. + */ public Timestamp getStartTimestamp() { return startTimestamp; } + /** + * @param startTimestamp + * The startTimestamp to set. + */ public void setStartTimestamp(Timestamp startTimestamp) { this.startTimestamp = startTimestamp; } ! /** ! * @return Returns the pathQuery. ! */ ! public boolean isPathQuery() { ! return pathQuery; } ! /** ! * @return Returns the query. ! */ ! public boolean isQuery() { ! return query; } ! /** ! * @return Returns the retrieval. ! */ ! public boolean isRetrieval() { ! return retrieval; } *************** *** 130,143 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub - - } - public String getExactDateRequest() { - return exactDateRequest; - } - - public void setExactDateRequest(String exactDateRequest) { - this.exactDateRequest = exactDateRequest; } --- 241,245 ---- Index: Resource.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/Resource.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Resource.java 18 Oct 2005 02:30:48 -0000 1.1 --- Resource.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,19 **** ! /** ! * */ - package org.archive.wayback.core; ! import java.io.IOException; import org.archive.io.arc.ARCRecord; /** ! * @author brad ! * */ public class Resource { ! ARCRecord arcRecord = null; // probably this should inherit from ARCRecord... public Resource(final ARCRecord rec) { super(); --- 1,48 ---- ! /* Resource ! * ! * Created on 2005/10/18 14:00:00 ! * ! * Copyright (C) 2005 Internet Archive. ! * ! * This file is part of the Wayback Machine (crawler.archive.org). ! * ! * Wayback Machine is free software; you can redistribute it and/or modify ! * it under the terms of the GNU Lesser Public License as published by ! * the Free Software Foundation; either version 2.1 of the License, or ! * any later version. ! * ! * Wayback Machine is distributed in the hope that it will be useful, ! * but WITHOUT ANY WARRANTY; without even the implied warranty of ! * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! * GNU Lesser Public License for more details. ! * ! * You should have received a copy of the GNU Lesser Public License ! * along with Wayback Machine; if not, write to the Free Software ! * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ ! package org.archive.wayback.core; import org.archive.io.arc.ARCRecord; /** ! * Slightly more than an ARCRecord. This class is designed to be an abstraction ! * to allow the Wayback to operator with non-ARC file format resources. Probably ! * the interface required will end up looking very much like ARCRecord, but can ! * be reimplemented to handle new ARC formats or non-ARC formats. At the moment, ! * users of this class just grab the ARCRecord out and use it directly. ! * ! * @author Brad Tofel ! * @version $Date$, $Revision$ */ public class Resource { ! ARCRecord arcRecord = null; + /** + * Constructor + * + * @param rec + */ public Resource(final ARCRecord rec) { super(); *************** *** 21,40 **** } ! public String dumpRaw() throws IOException { ! arcRecord.skipHttpHeader(); ! String content = ""; ! ! byte[] outputBuffer = new byte[8 * 1024]; ! int read = outputBuffer.length; ! while ((read = arcRecord.read(outputBuffer, 0, outputBuffer.length)) != -1) { ! String tmpString = new String(outputBuffer, 0, read); ! content = content.concat(tmpString); ! //System.out.write(outputBuffer, 0, read); ! } ! //System.out.flush(); ! ! return content; ! } ! public ARCRecord getArcRecord() { return arcRecord; --- 50,56 ---- } ! /** ! * @return the ARCRecord underlying this Resource. ! */ public ARCRecord getArcRecord() { return arcRecord; *************** *** 45,49 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 61,64 ---- Index: Timestamp.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/Timestamp.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** Timestamp.java 18 Oct 2005 02:30:48 -0000 1.1 --- Timestamp.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* Timestamp + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.core; *************** *** 6,9 **** --- 29,38 ---- import org.archive.util.ArchiveUtils; + /** + * Represents a moment in time as a 14-digit string, and interally as a Date. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class Timestamp { private final static String FIRST1_TIMESTAMP = "19960101000000"; *************** *** 13,17 **** private final static String LAST1_TIMESTAMP = "19991231235959"; ! // private final static String LAST2_TIMESTAMP = "20311231235959"; private final static String LAST2_TIMESTAMP = "29991231235959"; --- 42,46 ---- private final static String LAST1_TIMESTAMP = "19991231235959"; ! // private final static String LAST2_TIMESTAMP = "20311231235959"; private final static String LAST2_TIMESTAMP = "29991231235959"; *************** *** 23,31 **** private Date date = null; public Timestamp() { super(); - // TODO Auto-generated constructor stub } public static Timestamp parseBefore(final String dateStr) throws ParseException { --- 52,68 ---- private Date date = null; + /** + * Constructor + */ public Timestamp() { super(); } + /** + * @param dateStr + * @return Timestamp object representing the earliest date represented by + * the (possibly) partial digit-string argument. + * @throws ParseException + */ public static Timestamp parseBefore(final String dateStr) throws ParseException { *************** *** 35,38 **** --- 72,81 ---- } + /** + * @param dateStr + * @return Timestamp object representing the latest date represented by the + * (possibly) partial digit-string argument. + * @throws ParseException + */ public static Timestamp parseAfter(final String dateStr) throws ParseException { *************** *** 42,45 **** --- 85,92 ---- } + /** + * @return Timestamp object representing the earliest possible date. + * @throws ParseException + */ public static Timestamp earliestTimestamp() throws ParseException { Timestamp ts = new Timestamp(); *************** *** 48,51 **** --- 95,102 ---- } + /** + * @return Timestamp object representing the latest possible date. + * @throws ParseException + */ public static Timestamp latestTimestamp() throws ParseException { Timestamp ts = new Timestamp(); *************** *** 54,57 **** --- 105,113 ---- } + /** + * @param sse + * @return Timestamp object representing the seconds since epoch argument. + * @throws ParseException + */ public static Timestamp fromSse(final int sse) throws ParseException { String dateStr = ArchiveUtils.get14DigitDate(sse * 1000); *************** *** 94,101 **** --- 150,167 ---- } + /** + * @return the 14-digit String representation of this Timestamp. + */ public String getDateStr() { return dateStr; } + /** + * initialize interal data structures for this Timestamp from the 14-digit + * argument. + * + * @param dateStr + * @throws ParseException + */ public void setDateStr(String dateStr) throws ParseException { date = ArchiveUtils.parse14DigitDate(dateStr); *************** *** 103,106 **** --- 169,176 ---- } + /** + * @return the integer number of seconds since epoch represented by this + * Timestamp. + */ public int sse() { return Math.round(date.getTime() / 1000); *************** *** 108,120 **** /** ! * function that calculates integer milliseconds between this records ! * timeStamp and the arguments timeStamp. result is the absolute ! * number of milliseconds difference. * ! * @param String 14 digit UTC representation of another timestamp. ! * @return int seconds between the argument and this records timestamp. ! * @throws ParseException if the inputstring was malformed */ - public long absDistanceFromTimestamp(final Timestamp otherTimeStamp) throws ParseException { --- 178,190 ---- /** ! * function that calculates integer milliseconds between this records ! * timeStamp and the arguments timeStamp. result is the absolute number of ! * milliseconds difference. * ! * @param otherTimeStamp ! * @return int absolute milliseconds between the argument and this records ! * timestamp. ! * @throws ParseException */ public long absDistanceFromTimestamp(final Timestamp otherTimeStamp) throws ParseException { *************** *** 123,136 **** /** ! * function that calculates integer milliseconds between this records ! * timeStamp and the arguments timeStamp. result is negative if ! * this records timeStamp is less than the argument, positive ! * if it is greater, and 0 if the same. ! * ! * @param String 14 digit UTC representation of another timestamp. ! * @return int seconds between the argument and this records timestamp. ! * @throws ParseException if the inputstring was malformed */ - public long distanceFromTimestamp(final Timestamp otherTimeStamp) throws ParseException { --- 193,205 ---- /** ! * function that calculates integer milliseconds between this records ! * timeStamp and the arguments timeStamp. result is negative if this records ! * timeStamp is less than the argument, positive if it is greater, and 0 if ! * the same. ! * ! * @param otherTimeStamp ! * @return long milliseconds ! * @throws ParseException */ public long distanceFromTimestamp(final Timestamp otherTimeStamp) throws ParseException { *************** *** 141,156 **** --- 210,238 ---- } + /** + * @return the year portion(first 4 digits) of this Timestamp + */ public String getYear() { return this.dateStr.substring(0, 4); } + /** + * @return the month portion(digits 5-6) of this Timestamp + */ public String getMonth() { return this.dateStr.substring(4, 6); } + /** + * @return the day portion(digits 7-8) of this Timestamp + */ public String getDay() { return this.dateStr.substring(6, 8); } + /** + * @return user friendly String representation of the Date of this + * Timestamp. + */ public String prettyDate() { String year = dateStr.substring(0, 4); *************** *** 165,168 **** --- 247,254 ---- } + /** + * @return user friendly String representation of the Time of this + * Timestamp. + */ public String prettyTime() { return dateStr.substring(8, 10) + ":" + dateStr.substring(10, 12) + ":" *************** *** 170,185 **** --- 256,296 ---- } + /** + * @return user friendly String representation of the Date and Time of this + * Timestamp. + */ public String prettyDateTime() { return prettyDate() + " " + prettyTime(); } + /** + * Presently unused, but possibly helpful in complex QueryUI generation. + * + * @return Timestamp representing the start of the Year this Timestamp + * occured in. + * @throws ParseException + */ public Timestamp startOfYear() throws ParseException { return parseBefore(dateStr.substring(0, 4)); } + /** + * Presently unused, but possibly helpful in complex QueryUI generation. + * + * @return Timestamp representing the start of the Month this Timestamp + * occured in. + * @throws ParseException + */ public Timestamp startOfMonth() throws ParseException { return parseBefore(dateStr.substring(0, 6)); } + /** + * Presently unused, but possibly helpful in complex QueryUI generation. + * + * @return Timestamp representing the start of the Week this Timestamp + * occured in. + * @throws ParseException + */ public Timestamp startOfWeek() throws ParseException { String yearMonth = dateStr.substring(0, 6); *************** *** 192,199 **** --- 303,324 ---- } + /** + * Presently unused, but possibly helpful in complex QueryUI generation. + * + * @return Timestamp representing the start of the Day this Timestamp + * occured in. + * @throws ParseException + */ public Timestamp startOfDay() throws ParseException { return parseBefore(dateStr.substring(0, 8)); } + /** + * Presently unused, but possibly helpful in complex QueryUI generation. + * + * @return Timestamp representing the start of the Hour this Timestamp + * occured in. + * @throws ParseException + */ public Timestamp startOfHour() throws ParseException { return parseBefore(dateStr.substring(0, 10)); *************** *** 204,228 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } } ! //public Date getDate() { ! //String[] ids = TimeZone.getAvailableIDs(0); ! //if(ids.length < 1) { ! // return null; ! //} ! //TimeZone gmt = new SimpleTimeZone(0,ids[0]); ! //Calendar cal = new GregorianCalendar(gmt); ! //int year = Integer.parseInt(dateStr.substring(0,4)); ! //int month = Integer.parseInt(dateStr.substring(4,2)) - 1; ! //int day = Integer.parseInt(dateStr.substring(6,2)); ! //int hour = Integer.parseInt(dateStr.substring(8,2)); ! //int min = Integer.parseInt(dateStr.substring(10,2)); ! //int sec = Integer.parseInt(dateStr.substring(12,2)); // ! //cal.set(year,month,day,hour,min,sec); ! //return cal.getTime(); ! //} // \ No newline at end of file --- 329,352 ---- */ public static void main(String[] args) { } } ! // public Date getDate() { ! // String[] ids = TimeZone.getAvailableIDs(0); ! // if(ids.length < 1) { ! // return null; ! // } ! // TimeZone gmt = new SimpleTimeZone(0,ids[0]); ! // Calendar cal = new GregorianCalendar(gmt); ! // int year = Integer.parseInt(dateStr.substring(0,4)); ! // int month = Integer.parseInt(dateStr.substring(4,2)) - 1; ! // int day = Integer.parseInt(dateStr.substring(6,2)); ! // int hour = Integer.parseInt(dateStr.substring(8,2)); ! // int min = Integer.parseInt(dateStr.substring(10,2)); ! // int sec = Integer.parseInt(dateStr.substring(12,2)); // ! // cal.set(year,month,day,hour,min,sec); ! // return cal.getTime(); ! // } // \ No newline at end of file Index: WaybackLogic.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/WaybackLogic.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** WaybackLogic.java 18 Oct 2005 02:30:48 -0000 1.1 --- WaybackLogic.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* WaybackLogic + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.core; *************** *** 9,12 **** --- 32,41 ---- import org.archive.wayback.ResourceStore; + /** + * Constructor and go-between for the major components in the Wayback Machine. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class WaybackLogic { private static final Logger LOGGER = Logger.getLogger(WaybackLogic.class *************** *** 29,37 **** private ResourceStore resourceStore = null; public WaybackLogic() { super(); - // TODO Auto-generated constructor stub } public void init(Properties p) throws Exception { LOGGER.info("WaybackLogic constructing classes..."); --- 58,77 ---- private ResourceStore resourceStore = null; + /** + * Constructor + */ public WaybackLogic() { super(); } + /** + * Initialize this WaybackLogic. Pass in the specific configurations via + * Properties. Will construct and initialize implementations of + * ResourceIndex, ResourceResults, QueryUI, and ReplayUI. + * + * @param p + * Generic properties bag for configurations + * @throws Exception + */ public void init(Properties p) throws Exception { LOGGER.info("WaybackLogic constructing classes..."); *************** *** 84,108 **** /** ! * @param args */ - public static void main(String[] args) { - // TODO Auto-generated method stub - - } - public QueryUI getQueryUI() { return queryUI; } public ReplayUI getReplayUI() { return replayUI; } public ResourceIndex getResourceIndex() { return resourceIndex; } public ResourceStore getResourceStore() { return resourceStore; } } --- 124,159 ---- /** ! * @return Returns the queryUI. */ public QueryUI getQueryUI() { return queryUI; } + /** + * @return Returns the replayUI. + */ public ReplayUI getReplayUI() { return replayUI; } + /** + * @return Returns the resourceIndex. + */ public ResourceIndex getResourceIndex() { return resourceIndex; } + /** + * @return Returns the resourceStore. + */ public ResourceStore getResourceStore() { return resourceStore; } + + /** + * @param args + */ + public static void main(String[] args) { + + } } Index: ResourceResults.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core/ResourceResults.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ResourceResults.java 18 Oct 2005 02:30:48 -0000 1.1 --- ResourceResults.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ResourceResults + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.core; *************** *** 8,14 **** --- 31,46 ---- import org.archive.wayback.core.WMRequest; + /** + * Slightly more than an ArrayList of ResourceResult objects.. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class ResourceResults { ArrayList results = null; + /** + * Constructor + */ public ResourceResults() { super(); *************** *** 16,31 **** --- 48,78 ---- } + /** + * @return true if no ResourceResult objects, false otherwise. + */ public boolean isEmpty() { return results.isEmpty(); } + /** + * @param result + * ResourceResult to add to this set + */ public void addResourceResult(final ResourceResult result) { results.add(result); } + /** + * @return number of ResourceResult objects contained in this set + */ public int getNumResults() { return results.size(); } + /** + * @param wmRequest + * @return the temporally closest ResourceResult object contained in this + * set to the exactTimestamp of the WMRequest argument. + */ public ResourceResult getClosest(final WMRequest wmRequest) { ResourceResult closest = null; *************** *** 52,59 **** --- 99,115 ---- } + /** + * @return an Iterator that contains the ResourceResult objects + */ public Iterator iterator() { return results.iterator(); } + /** + * unused presently, possibly useful in advanced QueryUI column + * generation... + * + * @return Arraylist of String years included in this set. + */ public ArrayList getYears() { ArrayList years = new ArrayList(); *************** *** 71,74 **** --- 127,137 ---- } + /** + * unused presently, possibly useful in advanced QueryUI column + * generation... + * + * @param year + * @return ArrayList of ResourceResult objects within the year argument. + */ public ArrayList resultsInYear(String year) { ArrayList resultsToReturn = new ArrayList(); *************** *** 83,86 **** --- 146,153 ---- } + /** + * @return the earliest Timestamp among all ResourceResult objects in this + * set. + */ public Timestamp firstTimestamp() { if (results.isEmpty()) { *************** *** 91,94 **** --- 158,165 ---- } + /** + * @return the latest Timestamp among all ResourceResult objects in this + * set. + */ public Timestamp lastTimestamp() { if (results.isEmpty()) { *************** *** 103,107 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 174,177 ---- |
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localbdbresourceindex In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/localbdbresourceindex Modified Files: BDBResourceIndexWriter.java LocalBDBResourceIndex.java BDBResourceIndex.java Log Message: lots of javadoc comments Index: BDBResourceIndex.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localbdbresourceindex/BDBResourceIndex.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** BDBResourceIndex.java 18 Oct 2005 02:30:50 -0000 1.1 --- BDBResourceIndex.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* BDBResourceIndex + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.localbdbresourceindex; *************** *** 8,21 **** import org.archive.wayback.core.ResourceResults; ! //import com.sleepycat.bind.tuple.TupleBinding; ! //import com.sleepycat.bind.tuple.TupleInput; ! //import com.sleepycat.bind.tuple.TupleOutput; import com.sleepycat.je.Cursor; ! //import com.sleepycat.je.CursorConfig; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; ! //import com.sleepycat.je.DatabaseNotFoundException; import com.sleepycat.je.Environment; import com.sleepycat.je.EnvironmentConfig; --- 31,44 ---- import org.archive.wayback.core.ResourceResults; ! // import com.sleepycat.bind.tuple.TupleBinding; ! // import com.sleepycat.bind.tuple.TupleInput; ! // import com.sleepycat.bind.tuple.TupleOutput; import com.sleepycat.je.Cursor; ! // import com.sleepycat.je.CursorConfig; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; ! // import com.sleepycat.je.DatabaseNotFoundException; import com.sleepycat.je.Environment; import com.sleepycat.je.EnvironmentConfig; *************** *** 23,26 **** --- 46,55 ---- import com.sleepycat.je.OperationStatus; + /** + * ResourceResults-specific wrapper on top of the BDBJE database. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class BDBResourceIndex { private String path; *************** *** 32,37 **** Database db = null; ! // Cursor cursor = null; public BDBResourceIndex(final String thePath, final String theDbName) throws Exception { --- 61,75 ---- Database db = null; ! // Cursor cursor = null; + /** + * Constructor + * + * @param thePath + * directory where BDBJE files are stored + * @param theDbName + * name of BDB database + * @throws Exception + */ public BDBResourceIndex(final String thePath, final String theDbName) throws Exception { *************** *** 84,88 **** LockMode.DEFAULT); while (status == OperationStatus.SUCCESS) { ! // String keyString = new String(key.getData()); String valueString = new String(value.getData()); --- 122,126 ---- LockMode.DEFAULT); while (status == OperationStatus.SUCCESS) { ! // String keyString = new String(key.getData()); String valueString = new String(value.getData()); *************** *** 108,112 **** dbe.printStackTrace(); } catch (ParseException e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 146,149 ---- *************** *** 152,156 **** dbe.printStackTrace(); } catch (ParseException e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 189,192 ---- *************** *** 179,183 **** cursor.close(); } catch (DatabaseException e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 215,218 ---- *************** *** 188,192 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 223,226 ---- Index: BDBResourceIndexWriter.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localbdbresourceindex/BDBResourceIndexWriter.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** BDBResourceIndexWriter.java 18 Oct 2005 02:30:50 -0000 1.1 --- BDBResourceIndexWriter.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* BDBResourceIndexWriter + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.localbdbresourceindex; *************** *** 9,18 **** import com.sleepycat.je.DatabaseException; public class BDBResourceIndexWriter { private BDBResourceIndex db = null; public BDBResourceIndexWriter() { super(); - // TODO Auto-generated constructor stub } --- 32,49 ---- import com.sleepycat.je.DatabaseException; + /** + * Implements updates to a BDBResourceIndex + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class BDBResourceIndexWriter { private BDBResourceIndex db = null; + /** + * Constructor + */ public BDBResourceIndexWriter() { super(); } *************** *** 30,33 **** --- 61,72 ---- } + /** + * reads all ResourceResult objects from CDX at filePath, and merges them + * into the BDBResourceIndex. + * + * @param filePath + * to CDX file + * @throws Exception + */ public void importFile(String filePath) throws Exception { ResourceResults results = readFile(filePath); *************** *** 68,72 **** idx.shutdown(); } catch (Exception e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 107,110 ---- Index: LocalBDBResourceIndex.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localbdbresourceindex/LocalBDBResourceIndex.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** LocalBDBResourceIndex.java 18 Oct 2005 02:30:50 -0000 1.1 --- LocalBDBResourceIndex.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* LocalBDBResourceIndex + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.localbdbresourceindex; *************** *** 12,15 **** --- 35,44 ---- import org.archive.wayback.exception.WaybackException; + /** + * Implements ResourceIndex interface using a BDBResourceIndex + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class LocalBDBResourceIndex implements ResourceIndex { private static Thread indexUpdateThread = null; *************** *** 29,32 **** --- 58,64 ---- private BDBResourceIndex db = null; + /** + * Constructor + */ public LocalBDBResourceIndex() { super(); *************** *** 56,60 **** if (runPipeline != null) { - // QUESTION: are we sure there will be a single instace System.out .println("LocalDBDResourceIndex starting pipeline thread..."); --- 88,91 ---- *************** *** 70,74 **** public ResourceResults query(WMRequest request) throws IOException, WaybackException { - // TODO add check of WMRequest and call different methods: String searchHost = request.getRequestURI().getHostBasename(); String searchPath = request.getRequestURI().getEscapedPathQuery(); --- 101,104 ---- *************** *** 102,105 **** --- 132,142 ---- } + /** + * Thread that repeatedly runs processing of an IndexPipeline and merges new + * data into a BDBResourceIndex + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ private class IndexUpdateThread extends Thread { private final static int SLEEP_MILLISECONDS = 10000; *************** *** 109,112 **** --- 146,157 ---- IndexPipeline pipeline = null; + /** + * Constructor + * + * @param bdb + * initialized BDBResourceIndex + * @param pipeline + * initialized IndexPipeline + */ public IndexUpdateThread(final BDBResourceIndex bdb, IndexPipeline pipeline) { *************** *** 126,130 **** sleep(SLEEP_MILLISECONDS); } catch (InterruptedException e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 171,174 ---- *************** *** 138,142 **** // System.out.println("Indexed..."); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 182,185 ---- *************** *** 145,152 **** private void mergeIndex() { int numMerged = 0; ! String newFiles[] = pipeline.mergeDir.list(); for (int i = 0; i < newFiles.length; i++) { // TODO: Special handling of encoding and date. ! File newFile = new File(pipeline.mergeDir.getAbsolutePath() + "/" + newFiles[i]); --- 188,196 ---- private void mergeIndex() { int numMerged = 0; ! String newFiles[] = pipeline.getMergeDir().list(); for (int i = 0; i < newFiles.length; i++) { // TODO: Special handling of encoding and date. ! File newFile = new File(pipeline.getMergeDir() ! .getAbsolutePath() + "/" + newFiles[i]); *************** *** 160,164 **** numMerged++; } catch (Exception e) { - // TODO Auto-generated catch block e.printStackTrace(); } --- 204,207 ---- *************** *** 170,180 **** } } - - /** - * @param args - */ - public static void main(String[] args) { - // TODO Auto-generated method stub - - } } --- 213,215 ---- |
From: Brad <bra...@us...> - 2005-10-19 01:22:46
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/simplequeryui In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/simplequeryui Modified Files: SimpleQueryUI.java UIResults.java Log Message: lots of javadoc comments Index: UIResults.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/simplequeryui/UIResults.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** UIResults.java 18 Oct 2005 02:30:49 -0000 1.1 --- UIResults.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,24 ---- + /* UIResults + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ package org.archive.wayback.simplequeryui; *************** *** 11,14 **** --- 33,42 ---- import org.archive.wayback.core.WMRequest; + /** + * Provides easy access to data required in dispatched QueryUI JSPs. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class UIResults { *************** *** 31,37 **** private HttpServletRequest httpServletRequest; ! // private String nextPageUrl; ! // private String serverBaseUrl; ! public UIResults(WMRequest wmRequest, ResourceResults results, HttpServletRequest request, ReplayUI replayUI) { --- 59,68 ---- private HttpServletRequest httpServletRequest; ! /** ! * @param wmRequest ! * @param results ! * @param request ! * @param replayUI ! */ public UIResults(WMRequest wmRequest, ResourceResults results, HttpServletRequest request, ReplayUI replayUI) { *************** *** 47,78 **** --- 78,134 ---- } + /** + * @return Timestamp end cutoff requested by user + */ public Timestamp getEndTimestamp() { return endTimestamp; } + /** + * @return first Timestamp in returned ResourceResults + */ public Timestamp getFirstResultTimestamp() { return firstResultTimestamp; } + /** + * @return last Timestamp in returned ResourceResults + */ public Timestamp getLastResultTimestamp() { return lastResultTimestamp; } + /** + * @return number of ResourceResult objects in response + */ public int getNumResults() { return numResults; } + /** + * @return URL or URL prefix requested by user + */ public String getSearchUrl() { return searchUrl; } + /** + * @return Timestamp start cutoff requested by user + */ public Timestamp getStartTimestamp() { return startTimestamp; } + /** + * @return Iterator of ResourceResults + */ public Iterator resultsIterator() { return results.iterator(); } + /** + * @param result + * @return URL string that will replay the specified Resource Result. + */ public String resultToReplayUrl(ResourceResult result) { return replayUI.makeReplayURI(httpServletRequest, result); Index: SimpleQueryUI.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/simplequeryui/SimpleQueryUI.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** SimpleQueryUI.java 18 Oct 2005 02:30:49 -0000 1.1 --- SimpleQueryUI.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,24 ---- + /* SimpleQueryUI + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ package org.archive.wayback.simplequeryui; *************** *** 25,28 **** --- 47,57 ---- import org.archive.wayback.exception.WaybackException; + /** + * Trivial QueryUI HTTP implementation. Basic error types are reported, and very + * non-scalable HTML UI using dispatched JSP pages. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class SimpleQueryUI implements QueryUI, RequestParser { private final static String JSP_PATH = "queryui.jsppath"; *************** *** 36,42 **** private String jspPath = null; public SimpleQueryUI() { super(); - // TODO Auto-generated constructor stub } --- 65,73 ---- private String jspPath = null; + /** + * Constructor + */ public SimpleQueryUI() { super(); } *************** *** 49,53 **** public WMRequest parseRequest(HttpServletRequest request) { - // TODO Auto-generated method stub WMRequest wmRequest = null; Matcher matcher = null; --- 80,83 ---- *************** *** 71,75 **** wmRequest.setEndTimestamp(Timestamp.parseAfter(dateStr)); } catch (ParseException e1) { - // TODO Auto-generated catch block e1.printStackTrace(); return null; --- 101,104 ---- *************** *** 97,101 **** wmRequest.setEndTimestamp(Timestamp.parseAfter(dateStr)); } catch (ParseException e1) { - // TODO Auto-generated catch block e1.printStackTrace(); return null; --- 126,129 ---- *************** *** 183,186 **** --- 211,224 ---- } + /** + * Display a WaybackException message + * + * @param wmRequest + * @param request + * @param response + * @param message + * @throws IOException + * @throws ServletException + */ public void showWaybackException(WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response, *************** *** 192,196 **** public void showNoMatches(WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! // TODO Auto-generated method stub request.setAttribute("results", wmRequest); String url = wmRequest.getRequestURI().getEscapedURI(); --- 230,234 ---- public void showNoMatches(WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! request.setAttribute("results", wmRequest); String url = wmRequest.getRequestURI().getEscapedURI(); *************** *** 203,209 **** } public void showError(String message, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! // TODO Auto-generated method stub request.setAttribute("message", message); --- 241,256 ---- } + /** + * Display a generic error message with simple template header+footer + * + * @param message + * @param request + * @param response + * @throws IOException + * @throws ServletException + */ public void showError(String message, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ! request.setAttribute("message", message); *************** *** 211,214 **** --- 258,268 ---- } + /** + * @param request + * @param response + * @param jspName + * @throws ServletException + * @throws IOException + */ private void proxyRequest(HttpServletRequest request, HttpServletResponse response, final String jspName) *************** *** 226,230 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 280,283 ---- |
From: Brad <bra...@us...> - 2005-10-19 01:22:45
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java Modified Files: README.txt Log Message: lots of javadoc comments Index: README.txt =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/README.txt,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** README.txt 18 Oct 2005 02:30:49 -0000 1.1 --- README.txt 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 4,7 **** --- 4,8 ---- org.archive.wayback.core includes implementations for internal classes used within wayback + org.archive.wayback.exception includes skeleton for a few exceptions -- no real significance as yet... *************** *** 20,27 **** 2) IndexPipeline: uses multiple directories to store "flag" files while updating the CDX-BDB ! (to be completed... Gordon's leaving!) - \ No newline at end of file --- 21,66 ---- 2) IndexPipeline: uses multiple directories to store "flag" files while updating the CDX-BDB + currently it runs in a single thread which calls: + 1) queue new arcs (inspect ARC directory, filter those already in queuedDir + create a flag file in toBeIndexed) + 2) index new arcs (for each flag file in queueDir, create a CDX in mergeDir, + remove flag file) + 3) merge new cdx files (for each CDX in mergeDir, insert into BDB, remove CDX) + This could be split into several threads, one for each step, but is in a single thread now + for simplicity. This implementation could be changed to use interfaces that worked in both the + local "standalone" version as well as with a distributed ResourceStore, ResourceIndex, and a + pool of ArcIndexers. Not a rev. 1 feature.. ! org.archive.wayback.ippreplayui ! this is unused, but subclasses RawReplayUI to modify the document content before returning. ! For now, all it does is add a <DIV> at the very end of the page. ! ! org.archive.wayback.localdbdresourceindex ! includes 3 classes: ! 1) BDBResourceIndex: ! a very thin RersourceResults-specific wrapper around the BDB-JE library. ! 2) LocalBDBResourceIndex: ! actual ResourceIndex implementation, transforms query() into one of the specific BDB ! queries. ! 3) BDBResourceWriter: ! super small module: importFile(CDXFile) ! ! org.archive.wayback.localresourcestore ! single class, transforms ARCLocation into a Resource with an ARCRecord (via ARCReader) ! ! org.archive.wayback.rawreplayui ! basic implementation of ReplayUI: ! parses Archival Urls ! transforms ResourceResults into ReplayUI Urls ! provides HTTP access to individual documents, including redirects to closest versions. ! no markup of resources ! ! org.archive.wayback.simplequeryui ! basic QueryUI implementation: ! trivial error display ! trivial UrlQuery and UrlPrefixQuery result HTML pages ! uses UIResults class to marshal result data to JSPs which actually draw the HTML ! the JSPs use header + footer templates for page consistancy, and are very basic. \ No newline at end of file |
From: Brad <bra...@us...> - 2005-10-19 01:22:45
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/exception In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/exception Modified Files: WaybackException.java BadQueryException.java Log Message: lots of javadoc comments Index: WaybackException.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/exception/WaybackException.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** WaybackException.java 18 Oct 2005 02:30:49 -0000 1.1 --- WaybackException.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,4 **** --- 1,33 ---- + /* WaybackException + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.exception; + /** + * Base class for Wayback internal exceptions. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class WaybackException extends Exception { *************** *** 8,11 **** --- 37,45 ---- private static final long serialVersionUID = 1L; + /** + * Constructor + * + * @param message + */ public WaybackException(String message) { super(message); *************** *** 16,20 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 50,53 ---- Index: BadQueryException.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/exception/BadQueryException.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** BadQueryException.java 18 Oct 2005 02:30:49 -0000 1.1 --- BadQueryException.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,4 **** --- 1,33 ---- + /* BadQueryException + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.exception; + /** + * Exception for malformed user query. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class BadQueryException extends WaybackException { *************** *** 8,11 **** --- 37,45 ---- private static final long serialVersionUID = 1L; + /** + * Constructor + * + * @param message + */ public BadQueryException(String message) { super(message); *************** *** 16,20 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 50,53 ---- |
From: Brad <bra...@us...> - 2005-10-19 01:22:45
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localresourcestore In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/localresourcestore Modified Files: LocalARCResourceStore.java Log Message: lots of javadoc comments Index: LocalARCResourceStore.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/localresourcestore/LocalARCResourceStore.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** LocalARCResourceStore.java 18 Oct 2005 02:30:49 -0000 1.1 --- LocalARCResourceStore.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* LocalARCResourceStore + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.localresourcestore; *************** *** 11,14 **** --- 34,43 ---- import org.archive.wayback.core.Resource; + /** + * Implements ResourceStore using a local directory of ARC files. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class LocalARCResourceStore implements ResourceStore { private static final String RESOURCE_PATH = "resourcestore.arcpath"; *************** *** 18,24 **** private String path = null; public LocalARCResourceStore() { super(); - // TODO Auto-generated constructor stub } --- 47,55 ---- private String path = null; + /** + * Constructor + */ public LocalARCResourceStore() { super(); } *************** *** 56,60 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } --- 87,90 ---- |
From: Brad <bra...@us...> - 2005-10-19 01:22:45
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/arcindexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/arcindexer Modified Files: IndexPipeline.java ArcIndexer.java Log Message: lots of javadoc comments Index: ArcIndexer.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/arcindexer/ArcIndexer.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ArcIndexer.java 18 Oct 2005 02:30:49 -0000 1.1 --- ArcIndexer.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ArcIndexer + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.arcindexer; *************** *** 17,28 **** import org.apache.commons.httpclient.Header; public class ArcIndexer { private final static String LOCATION_HTTP_HEADER = "Location"; public ArcIndexer() { super(); - // TODO Auto-generated constructor stub } public ResourceResults indexArc(final String arcPath) throws IOException { ResourceResults results = new ResourceResults(); --- 40,67 ---- import org.apache.commons.httpclient.Header; + /** + * Transforms an ARC file into ResourceResults, or a serialized ResourceResults + * file(CDX). + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class ArcIndexer { private final static String LOCATION_HTTP_HEADER = "Location"; + /** + * Constructor + */ public ArcIndexer() { super(); } + /** + * Create a ResourceResults representing the records in ARC file at arcPath. + * + * @param arcPath + * @return ResourceResults in arcPath. + * @throws IOException + */ public ResourceResults indexArc(final String arcPath) throws IOException { ResourceResults results = new ResourceResults(); *************** *** 31,35 **** arcReader.setParseHttpHeaders(true); // doh. this does not generate quite the columns we need: ! //arcReader.createCDXIndexFile(arcPath); Iterator itr = arcReader.iterator(); while (itr.hasNext()) { --- 70,74 ---- arcReader.setParseHttpHeaders(true); // doh. this does not generate quite the columns we need: ! // arcReader.createCDXIndexFile(arcPath); Iterator itr = arcReader.iterator(); while (itr.hasNext()) { *************** *** 39,47 **** result = arcRecordToResourceResult(rec, arc); } catch (NullPointerException e) { - // TODO Auto-generated catch block e.printStackTrace(); continue; } catch (ParseException e) { - // TODO Auto-generated catch block e.printStackTrace(); continue; --- 78,84 ---- *************** *** 81,85 **** } result.setRedirectUrl(redirectUrl); ! result.setTimeStamp(Timestamp.parseBefore(meta.getDate())); UURI uriCap = new UURI(meta.getUrl(), false); String searchHost = uriCap.getHostBasename(); --- 118,122 ---- } result.setRedirectUrl(redirectUrl); ! result.setTimestamp(Timestamp.parseBefore(meta.getDate())); UURI uriCap = new UURI(meta.getUrl(), false); String searchHost = uriCap.getHostBasename(); *************** *** 92,95 **** --- 129,139 ---- } + /** + * Write out ResourceResults into CDX file at cdxPath + * + * @param results + * @param cdxPath + * @throws IOException + */ public void serializeResults(final ResourceResults results, final String cdxPath) throws IOException { *************** *** 108,112 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub ArcIndexer indexer = new ArcIndexer(); String arc = args[0]; --- 152,155 ---- Index: IndexPipeline.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/arcindexer/IndexPipeline.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** IndexPipeline.java 18 Oct 2005 02:30:49 -0000 1.1 --- IndexPipeline.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* IndexPipeline + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.arcindexer; *************** *** 11,18 **** import com.sun.org.apache.xml.internal.utils.StringToStringTable; public class IndexPipeline { private File arcDir = null; ! public File mergeDir = null; private File queuedDir = null; --- 34,48 ---- import com.sun.org.apache.xml.internal.utils.StringToStringTable; + /** + * Implements updating of a BDBResourceIndex using several directories with data + * files or flag files. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class IndexPipeline { private File arcDir = null; ! private File mergeDir = null; private File queuedDir = null; *************** *** 24,30 **** private ArcIndexer indexer = null; public IndexPipeline() { super(); - // TODO Auto-generated constructor stub } --- 54,62 ---- private ArcIndexer indexer = null; + /** + * Constructor + */ public IndexPipeline() { super(); } *************** *** 35,38 **** --- 67,78 ---- } + /** + * Initialize this object from several path arguments. + * + * @param arcDir + * @param mergeDir + * @param workDir + * @throws IOException + */ public void init(final String arcDir, final String mergeDir, final String workDir) throws IOException { *************** *** 85,88 **** --- 125,133 ---- } + /** + * Find all new ARC files, and queue them for indexing. + * + * @throws IOException + */ public void queueNewArcs() throws IOException { ArrayList newArcs = getNewArcs(); *************** *** 96,99 **** --- 141,151 ---- } + /** + * Index any ARC files queued for indexing, queueing the resulting CDX files + * for merging with the BDBResourceIndex. + * + * @throws MalformedURLException + * @throws IOException + */ public void indexArcs() throws MalformedURLException, IOException { queueNewArcs(); *************** *** 130,136 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } } --- 182,194 ---- */ public static void main(String[] args) { } + /** + * @return Returns the mergeDir. + */ + public File getMergeDir() { + return mergeDir; + } + } |
From: Brad <bra...@us...> - 2005-10-19 01:22:45
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ippreplayui In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/ippreplayui Modified Files: InPagePresenceReplayUI.java Log Message: lots of javadoc comments Index: InPagePresenceReplayUI.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ippreplayui/InPagePresenceReplayUI.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** InPagePresenceReplayUI.java 18 Oct 2005 02:30:49 -0000 1.1 --- InPagePresenceReplayUI.java 19 Oct 2005 01:22:37 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* InPagePresenceReplayUI + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.ippreplayui; *************** *** 14,22 **** import org.archive.wayback.rawreplayui.RawReplayUI; public class InPagePresenceReplayUI extends RawReplayUI { public InPagePresenceReplayUI() { super(); - // TODO Auto-generated constructor stub } --- 37,55 ---- import org.archive.wayback.rawreplayui.RawReplayUI; + /** + * ReplayUI that inserts a DIV HTML tag at the end of a returned HTML document. + * Unused at present -- presently a proof of concept second ReplayUI + * implementation. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class InPagePresenceReplayUI extends RawReplayUI { + /** + * Constructor + */ public InPagePresenceReplayUI() { super(); } |
From: Michael S. <sta...@us...> - 2005-10-19 00:55:34
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27614/xdocs Modified Files: faq.fml Log Message: * xdocs/faq.fml note on need to rig up parse-ext if want pdfs to be indexed. Index: faq.fml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/faq.fml,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** faq.fml 6 Oct 2005 21:23:17 -0000 1.10 --- faq.fml 19 Oct 2005 00:55:25 -0000 1.11 *************** *** 219,223 **** --- 219,235 ---- </answer> </faq> + </part> + <part id="indexing"> + <title>Indexing</title> + <faq id="pdf"> + <question>Anything special to do indexing pdfs?</question> + <answer><p>Ensure ${NUTCHWAX}/plugins/parse-ext/plugin.xml + 'command' has full path to ${NUTCHWAX}/bin/parse-pdf.sh. + </p> + </answer> + </faq> + </part> + <part id="querying"> <title>Querying</title> |
From: Michael S. <sta...@us...> - 2005-10-18 23:21:21
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7054/xdocs Modified Files: srcbuild.xml Log Message: * project.properties * src/articles/releasenotes.xml * xdocs/srcbuild.xml Revert to 0.7.0 nutch. 0.7.1 has problems. * src/java/org/archive/access/nutch/Arc2Segment.java If we fail parse, don't add to index (Shouldd get rid of those no arcoffset, etc., messages we used get indexing). * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Don't warn if 'encoding' not present -- won't be present for many types. Index: srcbuild.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/srcbuild.xml,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** srcbuild.xml 18 Oct 2005 19:34:53 -0000 1.7 --- srcbuild.xml 18 Oct 2005 23:21:11 -0000 1.8 *************** *** 23,31 **** <a href="http://www.apache.org/dyn/closer.cgi/lucene/nutch/">nutch downloads</a>. ! The below has been tested working using nutch 0.7.0 and 0.7.1. Revert to ! either of these versions of Nutch if problems building (Nutchwax will not ! work with ! release 0.6 of Nutch). Unbundle the nutch release It usually untars as ! nutch-0.?.?. The build scripts are looking for 'nutch' in the ${NUTCHWAX} directory so you need to either rename nutch directory as Nutch or make a symbolic link from --- 23,30 ---- <a href="http://www.apache.org/dyn/closer.cgi/lucene/nutch/">nutch downloads</a>. ! The below has been tested working using nutch 0.7.0. Revert to ! this version of Nutch if problems building (Nutchwax will not ! work with release 0.6 of Nutch). Unbundle the nutch release It usually ! untars as nutch-0.?.?. The build scripts are looking for 'nutch' in the ${NUTCHWAX} directory so you need to either rename nutch directory as Nutch or make a symbolic link from *************** *** 33,37 **** If building against 0.7.1, you'll need to create the directory <literal>${NUTCH_HOME}/src/plugins/nutch-extensionpoints/src/java</literal> ! else the nutch ant build fails. You'll may also have to update ${NUTCHWAX}/project.properties to rename the corenutch jar if building against a nutch that is other than that which nutchwax is currently working --- 32,38 ---- If building against 0.7.1, you'll need to create the directory <literal>${NUTCH_HOME}/src/plugins/nutch-extensionpoints/src/java</literal> ! else the nutch ant build fails (There seem to be other issues running ! nutchwax against nutch 0.7.1 that need looking into). ! You'll may also have to update ${NUTCHWAX}/project.properties to rename the corenutch jar if building against a nutch that is other than that which nutchwax is currently working |
From: Michael S. <sta...@us...> - 2005-10-18 23:21:21
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7054/src/java/org/archive/access/nutch Modified Files: Arc2Segment.java Log Message: * project.properties * src/articles/releasenotes.xml * xdocs/srcbuild.xml Revert to 0.7.0 nutch. 0.7.1 has problems. * src/java/org/archive/access/nutch/Arc2Segment.java If we fail parse, don't add to index (Shouldd get rid of those no arcoffset, etc., messages we used get indexing). * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Don't warn if 'encoding' not present -- won't be present for many types. Index: Arc2Segment.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/Arc2Segment.java,v retrieving revision 1.29 retrieving revision 1.30 diff -C2 -d -r1.29 -r1.30 *** Arc2Segment.java 20 Aug 2005 00:09:36 -0000 1.29 --- Arc2Segment.java 18 Oct 2005 23:21:11 -0000 1.30 *************** *** 253,262 **** LOG.info("Failed parse: " + p.getData().getStatus().getMessage()); } - // FetchList.append(fle); - this.fetcher.append(fo); - // Content.append(c); - this.parseText.append(new ParseText(p.getText())); - this.parseData.append(p.getData()); } } catch (ParseException e) { --- 253,264 ---- LOG.info("Failed parse: " + p.getData().getStatus().getMessage()); + // Don't add if failed parse. + } else { + // FetchList.append(fle); + this.fetcher.append(fo); + // Content.append(c); + this.parseText.append(new ParseText(p.getText())); + this.parseData.append(p.getData()); } } } catch (ParseException e) { |
From: Michael S. <sta...@us...> - 2005-10-18 23:21:20
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/articles In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7054/src/articles Modified Files: releasenotes.xml Log Message: * project.properties * src/articles/releasenotes.xml * xdocs/srcbuild.xml Revert to 0.7.0 nutch. 0.7.1 has problems. * src/java/org/archive/access/nutch/Arc2Segment.java If we fail parse, don't add to index (Shouldd get rid of those no arcoffset, etc., messages we used get indexing). * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Don't warn if 'encoding' not present -- won't be present for many types. Index: releasenotes.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/articles/releasenotes.xml,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** releasenotes.xml 17 Oct 2005 20:57:03 -0000 1.1 --- releasenotes.xml 18 Oct 2005 23:21:11 -0000 1.2 *************** *** 18,21 **** --- 18,25 ---- <para>TODO</para> </abstract> + <para>NutchWAX has been built against Nutch 0.7.0 (There seem to be issues + with 0.7.1 build, and then some, so have not built against the 0.7.1 + release). + </para> <sect2 id="0_4_0_limitations"> |
From: Michael S. <sta...@us...> - 2005-10-18 23:21:20
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7054/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer Modified Files: IaIndexingFilter.java Log Message: * project.properties * src/articles/releasenotes.xml * xdocs/srcbuild.xml Revert to 0.7.0 nutch. 0.7.1 has problems. * src/java/org/archive/access/nutch/Arc2Segment.java If we fail parse, don't add to index (Shouldd get rid of those no arcoffset, etc., messages we used get indexing). * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Don't warn if 'encoding' not present -- won't be present for many types. Index: IaIndexingFilter.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** IaIndexingFilter.java 6 Oct 2005 21:23:17 -0000 1.21 --- IaIndexingFilter.java 18 Oct 2005 23:21:11 -0000 1.22 *************** *** 112,118 **** LOGGER.info("No metadata for " + doc.toString()); } else { ! // Add as stored, unindexed, and untokenized. add(url, doc, "encoding", p.getProperty(ENCODING_KEY), ! false, true, true, false); // Add as stored, indexed, and untokenized. add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY), --- 112,119 ---- LOGGER.info("No metadata for " + doc.toString()); } else { ! // Add as stored, unindexed, and untokenized. Don't warn if absent. ! // Its not a tradegy. add(url, doc, "encoding", p.getProperty(ENCODING_KEY), ! false, true, true, false, false); // Add as stored, indexed, and untokenized. add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY), *************** *** 177,182 **** boolean lowerCase, boolean store, boolean index, boolean tokenize) { if (fieldValue == null || fieldValue.length() <= 0) { ! LOGGER.warning("No " + fieldName + " for url " + url); return; } --- 178,193 ---- boolean lowerCase, boolean store, boolean index, boolean tokenize) { + add(url, doc, fieldName, fieldValue, lowerCase, store, index, tokenize, + true); + } + + private void add(final String url, final Document doc, + final String fieldName, final String fieldValue, + boolean lowerCase, boolean store, boolean index, + boolean tokenize, final boolean warn) { if (fieldValue == null || fieldValue.length() <= 0) { ! if (warn) { ! LOGGER.warning("No " + fieldName + " for url " + url); ! } return; } |
From: Michael S. <sta...@us...> - 2005-10-18 23:21:20
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7054 Modified Files: project.properties Log Message: * project.properties * src/articles/releasenotes.xml * xdocs/srcbuild.xml Revert to 0.7.0 nutch. 0.7.1 has problems. * src/java/org/archive/access/nutch/Arc2Segment.java If we fail parse, don't add to index (Shouldd get rid of those no arcoffset, etc., messages we used get indexing). * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Don't warn if 'encoding' not present -- won't be present for many types. Index: project.properties =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/project.properties,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** project.properties 18 Oct 2005 19:34:52 -0000 1.14 --- project.properties 18 Oct 2005 23:21:11 -0000 1.15 *************** *** 18,22 **** # Local jars to add to classpath. maven.jar.override = on ! maven.jar.corenutch = ${basedir}/nutch/build/nutch-0.7.1.jar maven.jar.lucene = ${basedir}/nutch/lib/lucene-1.9-rc1-dev.jar maven.jar.arc = ${basedir}/lib/arc-1.5.1-200508191341.jar --- 18,22 ---- # Local jars to add to classpath. maven.jar.override = on ! maven.jar.corenutch = ${basedir}/nutch/build/nutch-0.7.jar maven.jar.lucene = ${basedir}/nutch/lib/lucene-1.9-rc1-dev.jar maven.jar.arc = ${basedir}/lib/arc-1.5.1-200508191341.jar |
From: Michael S. <sta...@us...> - 2005-10-18 19:38:47
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8042/xdocs Modified Files: srcbuild.xml Log Message: * project.properties Point at nutch-0.7.1. Thats what we'll release against. * xdocs/srcbuild.xml More edits of src build notes. Index: srcbuild.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/srcbuild.xml,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** srcbuild.xml 18 Oct 2005 02:20:55 -0000 1.6 --- srcbuild.xml 18 Oct 2005 19:34:53 -0000 1.7 *************** *** 23,28 **** <a href="http://www.apache.org/dyn/closer.cgi/lucene/nutch/">nutch downloads</a>. ! The below has been tested working using nutch 0.7.0 and 0.7.1. Revert to ! this version of Nutch if problems building (Nutchwax will not work with release 0.6 of Nutch). Unbundle the nutch release It usually untars as nutch-0.?.?. The build scripts are looking for 'nutch' in --- 23,29 ---- <a href="http://www.apache.org/dyn/closer.cgi/lucene/nutch/">nutch downloads</a>. ! The below has been tested working using nutch 0.7.0 and 0.7.1. Revert to ! either of these versions of Nutch if problems building (Nutchwax will not ! work with release 0.6 of Nutch). Unbundle the nutch release It usually untars as nutch-0.?.?. The build scripts are looking for 'nutch' in *************** *** 32,50 **** If building against 0.7.1, you'll need to create the directory <literal>${NUTCH_HOME}/src/plugins/nutch-extensionpoints/src/java</literal> ! else the nutch ant build fails. You'll also have to update ! ${NUTCHWAX}/project.properties to point at the nutch 0.7.1 jar rather ! than at the 0.7.0 jar. </p> ! <p>Symlink ${NUTCHWAX}/nutch/conf/nutch-site.xml to ${NUTCHWAX}/conf/nutch-site.xml. Doing this, there is only one nutch-site.xml shared by core Nutch and by NutchWAX extensions. ! <pre> % cd ${NUTCHWAX}/nutch/conf % mv nutch-site.xml nutch-site.xml.original ! % ln -s ${NUTCHWAX}/conf/nutch-site.xml</pre> ! The version of nutch-site.xml that is in ${NUTCHWAX} has NutchWAX specific ! configuration overrides as well as hardcodings of collection names and the ! name ! of the archive host that holds archived pages. Edit these to suit your environment particularly 'archive.host' and 'archive.collection'. The NutchWAX search.jsp compounds these values to come up with the WAC URL --- 33,51 ---- If building against 0.7.1, you'll need to create the directory <literal>${NUTCH_HOME}/src/plugins/nutch-extensionpoints/src/java</literal> ! else the nutch ant build fails. You'll may also have to update ! ${NUTCHWAX}/project.properties to rename the corenutch jar if building ! against a nutch that is other than that which nutchwax is currently working ! against. </p> ! <p>Symlink <literal>${NUTCHWAX}/nutch/conf/nutch-site.xml.all</literal> to ${NUTCHWAX}/conf/nutch-site.xml. Doing this, there is only one nutch-site.xml shared by core Nutch and by NutchWAX extensions. ! <pre> % cd ${NUTCH_HOME}/nutch/conf % mv nutch-site.xml nutch-site.xml.original ! % ln -s ${NUTCHWAX}/conf/nutch-site.xml.all nutch-site.xml</pre> ! The <literal>nutch-site.xml.all</literal> that is in ${NUTCHWAX} has NutchWAX ! specific configuration overrides as well as hardcodings of collection names and ! the name of the archive host that holds archived pages. Edit these to suit your environment particularly 'archive.host' and 'archive.collection'. The NutchWAX search.jsp compounds these values to come up with the WAC URL |
From: Michael S. <sta...@us...> - 2005-10-18 19:35:08
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8042 Modified Files: project.properties Log Message: * project.properties Point at nutch-0.7.1. Thats what we'll release against. * xdocs/srcbuild.xml More edits of src build notes. Index: project.properties =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/project.properties,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** project.properties 2 Sep 2005 01:08:18 -0000 1.13 --- project.properties 18 Oct 2005 19:34:52 -0000 1.14 *************** *** 18,22 **** # Local jars to add to classpath. maven.jar.override = on ! maven.jar.corenutch = ${basedir}/nutch/build/nutch-0.7.jar maven.jar.lucene = ${basedir}/nutch/lib/lucene-1.9-rc1-dev.jar maven.jar.arc = ${basedir}/lib/arc-1.5.1-200508191341.jar --- 18,22 ---- # Local jars to add to classpath. maven.jar.override = on ! maven.jar.corenutch = ${basedir}/nutch/build/nutch-0.7.1.jar maven.jar.lucene = ${basedir}/nutch/lib/lucene-1.9-rc1-dev.jar maven.jar.arc = ${basedir}/lib/arc-1.5.1-200508191341.jar |
From: Brad <bra...@us...> - 2005-10-18 02:31:10
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/webapp/jsp/ReplayUI In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/webapp/jsp/ReplayUI Added Files: ErrorResult.jsp Log Message: Initial check-in -- pre code review --- NEW FILE: ErrorResult.jsp --- <jsp:include page="../template/UI-header.txt" /> <B><%= (String) request.getAttribute("message") %></B> <jsp:include page="../template/UI-footer.txt" /> |
From: Brad <bra...@us...> - 2005-10-18 02:31:10
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/webapp/WEB-INF In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/webapp/WEB-INF Added Files: web.xml .cvsignore Log Message: Initial check-in -- pre code review --- NEW FILE: .cvsignore --- classes --- NEW FILE: web.xml --- <?xml version="1.0"?> <!DOCTYPE web-app PUBLIC "-//Sun Microsystems, Inc.//DTD Web Application 2.3//EN" "http://java.sun.com/dtd/web-app_2_3.dtd"> <web-app> <servlet> <servlet-name>RetrievalServlet</servlet-name> <servlet-class>org.archive.wayback.servletglue.WBReplayUIServlet</servlet-class> <init-param> <param-name>UNUSED-replayui.class</param-name> <param-value>org.archive.wayback.ippreplayui.InPagePresenceReplayUI</param-value> </init-param> <init-param> <param-name>replayui.class</param-name> <param-value>org.archive.wayback.rawreplayui.RawReplayUI</param-value> </init-param> <init-param> <param-name>replayui.jsppath</param-name> <param-value>jsp/ReplayUI</param-value> </init-param> <init-param> <param-name>queryui.class</param-name> <param-value>org.archive.wayback.simplequeryui.SimpleQueryUI</param-value> </init-param> <init-param> <param-name>queryui.jsppath</param-name> <param-value>jsp/QueryUI</param-value> </init-param> <init-param> <param-name>resourcestore.class</param-name> <param-value>org.archive.wayback.localresourcestore.LocalARCResourceStore</param-value> </init-param> <init-param> <param-name>resourcestore.arcpath</param-name> <param-value>/home/brad/test-arc3</param-value> </init-param> <init-param> <param-name>resourceindex.class</param-name> <param-value>org.archive.wayback.localbdbresourceindex.LocalBDBResourceIndex</param-value> </init-param> <init-param> <param-name>resourceindex.indexPath</param-name> <param-value>/tmp/test-db</param-value> </init-param> <init-param> <param-name>resourceindex.dbName</param-name> <param-value>DB1</param-value> </init-param> <init-param> <param-name>resourceindex.arcPath</param-name> <param-value>/home/brad/test-arc3</param-value> </init-param> <init-param> <param-name>resourceindex.workPath</param-name> <param-value>/tmp/index-pipeline</param-value> </init-param> <init-param> <param-name>resourceindex.runPipeline</param-name> <param-value>1</param-value> </init-param> </servlet> <servlet-mapping> <servlet-name>RetrievalServlet</servlet-name> <url-pattern>/retrieve</url-pattern> </servlet-mapping> <servlet> <servlet-name>QueryServlet</servlet-name> <servlet-class>org.archive.wayback.servletglue.WBQueryUIServlet</servlet-class> <init-param> <param-name>UNUSED-replayui.class</param-name> <param-value>org.archive.wayback.ippreplayui.InPagePresenceReplayUI</param-value> </init-param> <init-param> <param-name>replayui.class</param-name> <param-value>org.archive.wayback.rawreplayui.RawReplayUI</param-value> </init-param> <init-param> <param-name>replayui.jsppath</param-name> <param-value>jsp/ReplayUI</param-value> </init-param> <init-param> <param-name>queryui.class</param-name> <param-value>org.archive.wayback.simplequeryui.SimpleQueryUI</param-value> </init-param> <init-param> <param-name>queryui.jsppath</param-name> <param-value>jsp/QueryUI</param-value> </init-param> <init-param> <param-name>resourcestore.class</param-name> <param-value>org.archive.wayback.localresourcestore.LocalARCResourceStore</param-value> </init-param> <init-param> <param-name>resourcestore.arcpath</param-name> <param-value>/home/brad/test-arc3</param-value> </init-param> <init-param> <param-name>resourceindex.class</param-name> <param-value>org.archive.wayback.localbdbresourceindex.LocalBDBResourceIndex</param-value> </init-param> <init-param> <param-name>resourceindex.indexPath</param-name> <param-value>/tmp/test-db</param-value> </init-param> <init-param> <param-name>resourceindex.dbName</param-name> <param-value>DB1</param-value> </init-param> <init-param> <param-name>resourceindex.arcPath</param-name> <param-value>/home/brad/test-arc3</param-value> </init-param> <init-param> <param-name>resourceindex.workPath</param-name> <param-value>/tmp/index-pipeline</param-value> </init-param> <init-param> <param-name>resourceindex.runPipeline</param-name> <param-value>1</param-value> </init-param> </servlet> <servlet-mapping> <servlet-name>QueryServlet</servlet-name> <url-pattern>/query</url-pattern> </servlet-mapping> <filter> <filter-name>RetrievalFilter</filter-name> <filter-class>org.archive.wayback.servletglue.RequestFilter</filter-class> <init-param> <param-name>requestParser.class</param-name> <param-value>org.archive.wayback.rawreplayui.RawReplayUI</param-value> </init-param> <init-param> <param-name>handler.url</param-name> <param-value>/retrieve</param-value> </init-param> </filter> <filter-mapping> <filter-name>RetrievalFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> <filter> <filter-name>QueryFilter</filter-name> <filter-class>org.archive.wayback.servletglue.RequestFilter</filter-class> <init-param> <param-name>requestParser.class</param-name> <param-value>org.archive.wayback.simplequeryui.SimpleQueryUI</param-value> </init-param> <init-param> <param-name>handler.url</param-name> <param-value>/query</param-value> </init-param> </filter> <filter-mapping> <filter-name>QueryFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> </web-app> |
From: Brad <bra...@us...> - 2005-10-18 02:31:10
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/simplequeryui In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/java/org/archive/wayback/simplequeryui Added Files: SimpleQueryUI.java UIResults.java Log Message: Initial check-in -- pre code review --- NEW FILE: UIResults.java --- package org.archive.wayback.simplequeryui; import java.util.Iterator; import javax.servlet.http.HttpServletRequest; import org.archive.wayback.ReplayUI; import org.archive.wayback.core.ResourceResult; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WMRequest; public class UIResults { private String searchUrl; private Timestamp startTimestamp; private Timestamp endTimestamp; private Timestamp firstResultTimestamp; private Timestamp lastResultTimestamp; private int numResults; private ResourceResults results; private ReplayUI replayUI; private HttpServletRequest httpServletRequest; // private String nextPageUrl; // private String serverBaseUrl; public UIResults(WMRequest wmRequest, ResourceResults results, HttpServletRequest request, ReplayUI replayUI) { this.searchUrl = wmRequest.getRequestURI().getEscapedURI(); this.startTimestamp = wmRequest.getStartTimestamp(); this.endTimestamp = wmRequest.getEndTimestamp(); this.firstResultTimestamp = results.firstTimestamp(); this.lastResultTimestamp = results.lastTimestamp(); this.numResults = results.getNumResults(); this.results = results; this.replayUI = replayUI; this.httpServletRequest = request; } public Timestamp getEndTimestamp() { return endTimestamp; } public Timestamp getFirstResultTimestamp() { return firstResultTimestamp; } public Timestamp getLastResultTimestamp() { return lastResultTimestamp; } public int getNumResults() { return numResults; } public String getSearchUrl() { return searchUrl; } public Timestamp getStartTimestamp() { return startTimestamp; } public Iterator resultsIterator() { return results.iterator(); } public String resultToReplayUrl(ResourceResult result) { return replayUI.makeReplayURI(httpServletRequest, result); } } --- NEW FILE: SimpleQueryUI.java --- package org.archive.wayback.simplequeryui; import java.io.IOException; import java.text.ParseException; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.servlet.RequestDispatcher; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; import org.archive.net.UURIFactory; import org.archive.wayback.QueryUI; import org.archive.wayback.ReplayUI; import org.archive.wayback.RequestParser; import org.archive.wayback.ResourceIndex; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WMRequest; import org.archive.wayback.core.WaybackLogic; import org.archive.wayback.exception.WaybackException; public class SimpleQueryUI implements QueryUI, RequestParser { private final static String JSP_PATH = "queryui.jsppath"; private final static Pattern WB_QUERY_REGEX = Pattern .compile("^/(\\d{0,13})\\*/(.*[^*])$"); private final static Pattern WB_PATH_QUERY_REGEX = Pattern .compile("^/(\\d{0,13})\\*/(.*)\\*$"); private String jspPath = null; public SimpleQueryUI() { super(); // TODO Auto-generated constructor stub } public void init(Properties p) throws IOException { this.jspPath = (String) p.get(JSP_PATH); if (this.jspPath == null || this.jspPath.length() <= 0) { throw new IllegalArgumentException("Failed to find " + JSP_PATH); } } public WMRequest parseRequest(HttpServletRequest request) { // TODO Auto-generated method stub WMRequest wmRequest = null; Matcher matcher = null; String origRequestPath = request.getRequestURI(); String contextPath = request.getContextPath(); if (!origRequestPath.startsWith(contextPath)) { return null; } String requestPath = origRequestPath.substring(contextPath.length()); matcher = WB_QUERY_REGEX.matcher(requestPath); if (matcher != null && matcher.matches()) { wmRequest = new WMRequest(); String dateStr = matcher.group(1); String urlStr = matcher.group(2); try { wmRequest.setStartTimestamp(Timestamp.parseBefore(dateStr)); wmRequest.setEndTimestamp(Timestamp.parseAfter(dateStr)); } catch (ParseException e1) { // TODO Auto-generated catch block e1.printStackTrace(); return null; } wmRequest.setQuery(); if (!urlStr.startsWith("http://")) { urlStr = "http://" + urlStr; } try { UURI requestURI = UURIFactory.getInstance(urlStr); wmRequest.setRequestURI(requestURI); } catch (URIException e) { wmRequest = null; } } else { matcher = WB_PATH_QUERY_REGEX.matcher(requestPath); if (matcher != null && matcher.matches()) { wmRequest = new WMRequest(); String dateStr = matcher.group(1); String urlStr = matcher.group(2); try { wmRequest.setStartTimestamp(Timestamp.parseBefore(dateStr)); wmRequest.setEndTimestamp(Timestamp.parseAfter(dateStr)); } catch (ParseException e1) { // TODO Auto-generated catch block e1.printStackTrace(); return null; } wmRequest.setPathQuery(); if (!urlStr.startsWith("http://")) { urlStr = "http://" + urlStr; } try { UURI requestURI = UURIFactory.getInstance(urlStr); wmRequest.setRequestURI(requestURI); } catch (URIException e) { wmRequest = null; } } } return wmRequest; } public void handle(WaybackLogic wayback, WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { ResourceIndex idx = wayback.getResourceIndex(); ResourceResults results; try { results = idx.query(wmRequest); } catch (WaybackException e1) { showWaybackException(wmRequest, request, response, e1.getMessage()); e1.printStackTrace(); return; } if (results.isEmpty()) { try { showNoMatches(wmRequest, request, response); } catch (ServletException e) { // TODO Fixxx.. throw new IOException(e.getMessage()); } return; } if (wmRequest.isQuery()) { showQueryResults(wayback, request, response, wmRequest, results); } else if (wmRequest.isPathQuery()) { showPathQueryResults(wayback, request, response, wmRequest, results); } else { showWaybackException(wmRequest, request, response, "Unknown query type error"); } } public void showQueryResults(WaybackLogic wayback, HttpServletRequest request, HttpServletResponse response, WMRequest wmRequest, ResourceResults results) throws IOException, ServletException { ReplayUI replayUI = wayback.getReplayUI(); UIResults uiResults = new UIResults(wmRequest, results, request, replayUI); request.setAttribute("ui-results", uiResults); proxyRequest(request, response, "QueryResults.jsp"); } public void showPathQueryResults(WaybackLogic wayback, HttpServletRequest request, HttpServletResponse response, WMRequest wmRequest, ResourceResults results) throws IOException, ServletException { ReplayUI replayUI = wayback.getReplayUI(); UIResults uiResults = new UIResults(wmRequest, results, request, replayUI); request.setAttribute("ui-results", uiResults); proxyRequest(request, response, "PathQueryResults.jsp"); } public void showIndexNotAvailable(WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { showError("Unexpected Exception: Index not available", request, response); } public void showWaybackException(WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response, String message) throws IOException, ServletException { showError("Unexpected Exception: " + message, request, response); } public void showNoMatches(WMRequest wmRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { // TODO Auto-generated method stub request.setAttribute("results", wmRequest); String url = wmRequest.getRequestURI().getEscapedURI(); String prettyStart = wmRequest.getStartTimestamp().prettyDateTime(); String prettyEnd = wmRequest.getEndTimestamp().prettyDateTime(); String message = "No matches for query " + url + " between " + prettyStart + " and " + prettyEnd; showError(message, request, response); } public void showError(String message, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { // TODO Auto-generated method stub request.setAttribute("message", message); proxyRequest(request, response, "ErrorResult.jsp"); } private void proxyRequest(HttpServletRequest request, HttpServletResponse response, final String jspName) throws ServletException, IOException { String finalJspPath = jspPath + "/" + jspName; RequestDispatcher dispatcher = request .getRequestDispatcher(finalJspPath); dispatcher.forward(request, response); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } |
From: Brad <bra...@us...> - 2005-10-18 02:31:10
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/ippreplayui In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/java/org/archive/wayback/ippreplayui Added Files: InPagePresenceReplayUI.java Log Message: Initial check-in -- pre code review --- NEW FILE: InPagePresenceReplayUI.java --- package org.archive.wayback.ippreplayui; import java.io.IOException; import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.io.arc.ARCRecord; import org.archive.wayback.core.Resource; import org.archive.wayback.core.ResourceResult; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.WMRequest; import org.archive.wayback.rawreplayui.RawReplayUI; public class InPagePresenceReplayUI extends RawReplayUI { public InPagePresenceReplayUI() { super(); // TODO Auto-generated constructor stub } private boolean isRawReplayResult(ResourceResult result) { if (-1 == result.getMimeType().indexOf("text/html")) { return true; } return false; } public void replayResource(WMRequest wmRequest, ResourceResult result, Resource resource, HttpServletRequest request, HttpServletResponse response, ResourceResults results) throws IOException { if (resource == null) { throw new IllegalArgumentException("No resource"); } if (result == null) { throw new IllegalArgumentException("No result"); } if (isRawReplayResult(result)) { super.replayResource(wmRequest, result, resource, request, response, results); return; } ARCRecord record = resource.getArcRecord(); record.skipHttpHeader(); copyRecordHttpHeader(response, record, true); // slurp the whole thing into RAM: byte[] bbuffer = new byte[4 * 1024]; StringBuffer sbuffer = new StringBuffer(); for (int r = -1; (r = record.read(bbuffer, 0, bbuffer.length)) != -1;) { String chunk = new String(bbuffer); sbuffer.append(chunk.substring(0, r)); } insertIPP(sbuffer, result, results); response.setHeader("Content-Length", "" + sbuffer.length()); ServletOutputStream out = response.getOutputStream(); out.print(new String(sbuffer)); } private void insertIPP(StringBuffer page, ResourceResult result, ResourceResults results) { int idx = findIPPInsertPoint(page); String ippInsert = makeIPPInsert(result, results); page.insert(idx, ippInsert); } private int findIPPInsertPoint(StringBuffer page) { return page.length(); } private String makeIPPInsert(ResourceResult result, ResourceResults results) { StringBuffer ippInsert = new StringBuffer(); ippInsert.append("<DIV NAME=\"iawm_ipp\">"); ippInsert.append("IPP"); ippInsert.append("</DIV>"); return ippInsert.toString(); } } |
From: Brad <bra...@us...> - 2005-10-18 02:31:10
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/java Added Files: README.txt Log Message: Initial check-in -- pre code review --- NEW FILE: README.txt --- wayback Overview: org.archive.wayback includes primary interfaces for Wayback components org.archive.wayback.core includes implementations for internal classes used within wayback org.archive.wayback.exception includes skeleton for a few exceptions -- no real significance as yet... org.archive.wayback.servletglue includes 3 primary interfaces: 1) RequestFilter: attempts to parse incoming HTTP requests, and forward to correct servlet. 2) WBQueryUIServlet: teeny glue that calls handle() on QueryUI for query requests 3) WBReplayUIServlet: teeny glue that calls handle() on ReplayUI for replay requests org.archive.wayback.arcindexer includes 2 classes: 1) ArcIndexer: transforms ARC file into CDX file 2) IndexPipeline: uses multiple directories to store "flag" files while updating the CDX-BDB (to be completed... Gordon's leaving!) |
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/java/org/archive/wayback Added Files: ResourceStore.java ResourceIndex.java ReplayUI.java QueryUI.java RequestParser.java Log Message: Initial check-in -- pre code review --- NEW FILE: QueryUI.java --- package org.archive.wayback; import java.io.IOException; import java.util.Properties; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.WMRequest; import org.archive.wayback.core.WaybackLogic; public interface QueryUI { public void init(final Properties p) throws IOException; public void handle(final WaybackLogic wayback, final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; public void showQueryResults(WaybackLogic wayback, HttpServletRequest request, HttpServletResponse response, final WMRequest wmRequest, final ResourceResults results) throws IOException, ServletException; public void showPathQueryResults(WaybackLogic wayback, HttpServletRequest request, HttpServletResponse response, final WMRequest wmRequest, final ResourceResults results) throws IOException, ServletException; public void showNoMatches(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; public void showIndexNotAvailable(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; } --- NEW FILE: ResourceStore.java --- package org.archive.wayback; import java.io.IOException; import java.util.Properties; import org.archive.io.arc.ARCLocation; import org.archive.wayback.core.Resource; public interface ResourceStore { public Resource retrieveResource(ARCLocation location) throws IOException; public void init(Properties p) throws Exception; } --- NEW FILE: RequestParser.java --- package org.archive.wayback; import javax.servlet.http.HttpServletRequest; import org.archive.wayback.core.WMRequest; public interface RequestParser { public WMRequest parseRequest(final HttpServletRequest request); } --- NEW FILE: ResourceIndex.java --- package org.archive.wayback; import java.io.IOException; import java.util.Properties; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.WMRequest; import org.archive.wayback.exception.WaybackException; public interface ResourceIndex { public ResourceResults query(final WMRequest request) throws IOException, WaybackException; public void init(Properties p) throws Exception; } --- NEW FILE: ReplayUI.java --- package org.archive.wayback; import java.io.IOException; import java.util.Properties; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.core.Resource; import org.archive.wayback.core.ResourceResult; import org.archive.wayback.core.ResourceResults; import org.archive.wayback.core.WMRequest; import org.archive.wayback.core.WaybackLogic; public interface ReplayUI { public void init(final Properties p) throws IOException; public String makeReplayURI(final HttpServletRequest request, final ResourceResult result); public void handle(final WaybackLogic wayback, final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; public void replayResource(final WMRequest wmRequest, final ResourceResult result, final Resource resource, final HttpServletRequest request, final HttpServletResponse response, final ResourceResults results) throws IOException, ServletException; public void showNotInArchive(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response) throws IOException, ServletException; public void showResourceNotAvailable(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response, final String message) throws IOException, ServletException; public void showIndexNotAvailable(final WMRequest wmRequest, final HttpServletRequest request, final HttpServletResponse response, final String message) throws IOException, ServletException; } |
From: Brad <bra...@us...> - 2005-10-18 02:31:10
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/webapp/jsp/QueryUI In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/webapp/jsp/QueryUI Added Files: ErrorResult.jsp PathQueryResults.jsp QueryResults.jsp Log Message: Initial check-in -- pre code review --- NEW FILE: PathQueryResults.jsp --- <%@ page import="java.util.Iterator" %> <%@ page import="java.util.ArrayList" %> <%@ page import="org.archive.wayback.core.ResourceResult" %> <%@ page import="org.archive.wayback.core.Timestamp" %> <%@ page import="org.archive.wayback.simplequeryui.UIResults" %> <jsp:include page="../template/UI-header.txt" /> <% UIResults results = (UIResults) request.getAttribute("ui-results"); String searchString = results.getSearchUrl(); int resultCount = results.getNumResults(); Timestamp searchStartTs = results.getStartTimestamp(); Timestamp searchEndTs = results.getEndTimestamp(); String prettySearchStart = searchStartTs.prettyDate(); String prettySearchEnd = searchEndTs.prettyDate(); Iterator itr = results.resultsIterator(); %> <B><%= resultCount %></B> results for <B><%= searchString %></B><BR> between <B><%= prettySearchStart %></B> and <B><%= prettySearchEnd %></B> <HR> <% boolean first = false; String lastUrl = null; String lastMD5 = null; while(itr.hasNext()) { ResourceResult result = (ResourceResult) itr.next(); String url = result.getUrl(); String prettyDate = result.getTimestamp().prettyDate(); String origHost = result.getOrigHost(); String MD5 = result.getMd5Fragment(); String redirectFlag = result.isRedirect() ? "(redirect)" : ""; String httpResponse = result.getHttpResponseCode(); String mimeType = result.getMimeType(); String replayUrl = results.resultToReplayUrl(result); boolean newUrl = false; if(lastUrl == null) { lastUrl = url; lastMD5 = ""; newUrl = true; } else if(0 != lastUrl.compareTo(url)) { newUrl = true; lastMD5 = ""; lastUrl = url; } if(newUrl) { %> <B><%= url %></B><BR> <% } if(0 != MD5.compareTo(lastMD5)) { lastMD5 = MD5; %> <A HREF="<%= replayUrl %>"><%= prettyDate %></A> <SPAN style="color:black;"><%= origHost %></SPAN> <SPAN style="color:gray;"><%= httpResponse %></SPAN> <SPAN style="color:brown;"><%= mimeType %></SPAN> <%= redirectFlag %> (new version) <BR> <% } else { %> <A HREF="<%= replayUrl %>"><%= prettyDate %></A> <SPAN style="color:green;"><%= origHost %></SPAN> <SPAN style="color:lightgray;">unchanged</SPAN> <BR> <% } } %> <jsp:include page="../template/UI-footer.txt" /> --- NEW FILE: QueryResults.jsp --- <%@ page import="java.util.Iterator" %> <%@ page import="java.util.ArrayList" %> <%@ page import="org.archive.wayback.core.ResourceResult" %> <%@ page import="org.archive.wayback.core.Timestamp" %> <%@ page import="org.archive.wayback.simplequeryui.UIResults" %> <jsp:include page="../template/UI-header.txt" /> <% UIResults results = (UIResults) request.getAttribute("ui-results"); String searchString = results.getSearchUrl(); int resultCount = results.getNumResults(); Timestamp searchStartTs = results.getStartTimestamp(); Timestamp searchEndTs = results.getEndTimestamp(); String prettySearchStart = searchStartTs.prettyDate(); String prettySearchEnd = searchEndTs.prettyDate(); Iterator itr = results.resultsIterator(); %> <B><%= resultCount %></B> results for <B><%= searchString %></B><BR> between <B><%= prettySearchStart %></B> and <B><%= prettySearchEnd %></B> <HR> <% boolean first = false; String lastMD5 = null; while(itr.hasNext()) { ResourceResult result = (ResourceResult) itr.next(); String prettyDate = result.getTimestamp().prettyDate(); String origHost = result.getOrigHost(); String MD5 = result.getMd5Fragment(); String redirectFlag = result.isRedirect() ? "(redirect)" : ""; String httpResponse = result.getHttpResponseCode(); String mimeType = result.getMimeType(); String replayUrl = results.resultToReplayUrl(result); boolean updated = false; if(lastMD5 == null) { lastMD5 = MD5; updated = true; } else if(0 != lastMD5.compareTo(MD5)) { updated = true; lastMD5 = MD5; } if(updated) { %> <A HREF="<%= replayUrl %>"><%= prettyDate %></A> <SPAN style="color:black;"><%= origHost %></SPAN> <SPAN style="color:gray;"><%= httpResponse %></SPAN> <SPAN style="color:brown;"><%= mimeType %></SPAN> <%= redirectFlag %> (new version) <BR> <% } else { %> <A HREF="<%= replayUrl %>"><%= prettyDate %></A> <SPAN style="color:green;"><%= origHost %></SPAN> <BR> <% } } %> <jsp:include page="../template/UI-footer.txt" /> --- NEW FILE: ErrorResult.jsp --- <jsp:include page="../template/UI-header.txt" /> <B><%= (String) request.getAttribute("message"); %></B> <jsp:include page="../template/UI-footer.txt" /> |