From: <bra...@us...> - 2007-07-16 22:22:17
|
Revision: 1767 http://archive-access.svn.sourceforge.net/archive-access/?rev=1767&view=rev Author: bradtofel Date: 2007-07-16 15:22:19 -0700 (Mon, 16 Jul 2007) Log Message: ----------- REFACTOR: new RequestParser classes for archival url Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,75 @@ +/* PathDatePrefixRequestParser + * + * $Id$ + * + * Created on 6:38:19 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an exact url and a date prefix. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathDatePrefixQueryRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL queries into timestamp + url for an exact + * URL + */ + private final static Pattern WB_QUERY_REGEX = Pattern + .compile("^(\\d{0,13})\\*/(.*[^*])$"); + + public WaybackRequest parse(String requestPath) { + + WaybackRequest wbRequest = null; + Matcher matcher = WB_QUERY_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String dateStr = matcher.group(1); + String urlStr = matcher.group(2); + + String startDate = Timestamp.parseBefore(dateStr).getDateStr(); + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,78 @@ +/* PathDateRangeQueryRequestParser + * + * $Id$ + * + * Created on 6:41:58 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an exact url and a date range. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathDateRangeQueryRequestParser extends PathRequestParser { + + /** + * Regex which parses Archival URL queries into Start Timestamp + + * End Timestamp + URL for an exact URL + */ + private final static Pattern WB_QUERY2_REGEX = Pattern + .compile("^(\\d{1,14})-(\\d{1,14})\\*/(.*[^*])$"); + + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_QUERY2_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String startDateStr = matcher.group(1); + String endDateStr = matcher.group(2); + String urlStr = matcher.group(3); + + String startDate = Timestamp.parseBefore(startDateStr).getDateStr(); + String endDate = Timestamp.parseAfter(endDateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE,startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } + +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,75 @@ +/* PathPrefixDatePrefixQueryRequestParser + * + * $Id$ + * + * Created on 6:42:18 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an url prefix and a date prefix. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathPrefixDatePrefixQueryRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL queries into timestamp + URL for URLs + * beginning with the URL prefix + */ + private final static Pattern WB_PATH_QUERY_REGEX = Pattern + .compile("^(\\d{0,13})\\*/(.*)\\*$"); + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_PATH_QUERY_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String dateStr = matcher.group(1); + String urlStr = matcher.group(2); + String startDate = Timestamp.parseBefore(dateStr).getDateStr(); + String endDate = Timestamp.parseAfter(dateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE, + startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_PREFIX_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,76 @@ +/* PathPrefixDateRangeQueryRequestParser + * + * $Id$ + * + * Created on 6:42:38 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from an Archival Url + * representing an url prefix and a date range. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class PathPrefixDateRangeQueryRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL queries into Start Timestamp + + * End Timestamp + URL for URLs beginning with the URL prefix + */ + private final static Pattern WB_PATH_QUERY2_REGEX = Pattern + .compile("^(\\d{1,14})-(\\d{1,14})\\*/(.*)\\*$"); + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_PATH_QUERY2_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + + wbRequest = new WaybackRequest(); + String startDateStr = matcher.group(1); + String endDateStr = matcher.group(2); + String urlStr = matcher.group(3); + String startDate = Timestamp.parseBefore(startDateStr).getDateStr(); + String endDate = Timestamp.parseAfter(endDateStr).getDateStr(); + wbRequest.put(WaybackConstants.REQUEST_START_DATE, + startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE,endDate); + + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_PREFIX_QUERY); + try { + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + wbRequest = null; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2007-07-16 22:22:19 UTC (rev 1767) @@ -0,0 +1,110 @@ +/* ReplayRequestParser + * + * $Id$ + * + * Created on 6:39:51 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.archivalurl.requestparser; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +import org.apache.commons.httpclient.URIException; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.requestparser.PathRequestParser; + +/** + * RequestParser implementation that extracts request info from a Replay + * Archival Url path. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ReplayRequestParser extends PathRequestParser { + /** + * Regex which parses Archival URL replay requests into timestamp + url + */ + private final Pattern WB_REQUEST_REGEX = Pattern + .compile("^(\\d{1,14})/(.*)$"); + + public WaybackRequest parse(String requestPath) { + WaybackRequest wbRequest = null; + Matcher matcher = WB_REQUEST_REGEX.matcher(requestPath); + if (matcher != null && matcher.matches()) { + wbRequest = new WaybackRequest(); + String dateStr = matcher.group(1); + String urlStr = matcher.group(2); + if (!urlStr.startsWith("http://")) { + urlStr = "http://" + urlStr; + } + + // The logic of the classic WM wrt timestamp bounding: + // if 14-digits are specified, assume min-max range boundaries + // if less than 14 are specified, assume min-max range boundaries + // based upon amount given (2001 => 20010101... - 20011231...) + // AND assume the user asked for the LATEST possible date + // within that range... + // + // ...don't ask me, I just work here. + + String startDate = null; + String endDate = null; + if (dateStr.length() == 14) { + startDate = Timestamp.earliestTimestamp().getDateStr(); + endDate = Timestamp.currentTimestamp().getDateStr(); + } else { + + // classic behavior: + // startDate = Timestamp.parseBefore(dateStr).getDateStr(); + // endDate = Timestamp.parseAfter(dateStr).getDateStr(); + // dateStr = endDate; + + // "better" behavior: + startDate = Timestamp.earliestTimestamp().getDateStr(); + endDate = Timestamp.currentTimestamp().getDateStr(); + dateStr = Timestamp.parseAfter(dateStr).getDateStr(); + + } + wbRequest.put(WaybackConstants.REQUEST_EXACT_DATE, dateStr); + wbRequest.put(WaybackConstants.REQUEST_START_DATE, startDate); + wbRequest.put(WaybackConstants.REQUEST_END_DATE, endDate); + + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + + try { +// String wbPrefix = wbRequest.getDefaultWaybackPrefix(); +// if (urlStr.startsWith(wbPrefix)) { +// wbRequest.setBetterRequestURI(urlStr); +// } + wbRequest.setRequestUrl(urlStr); + } catch (URIException e) { + e.printStackTrace(); + wbRequest = null; + } + } + return wbRequest; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |