From: <bra...@us...> - 2007-07-16 23:34:06
|
Revision: 1795 http://archive-access.svn.sourceforge.net/archive-access/?rev=1795&view=rev Author: bradtofel Date: 2007-07-16 16:33:58 -0700 (Mon, 16 Jul 2007) Log Message: ----------- REFACTOR: most of the request parsing code was moved into this package, including some abstract classes for consolidating common code from implementation classes for various replay/query modes. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,154 @@ +/* BaseRequestParser + * + * $Id$ + * + * Created on 3:15:12 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Map; +import java.util.Properties; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * Class that implements the RequestParser interface, and also understands how + * to: + * + * + * This class will attempt to use the overridable parseCustom() method to + * create the WaybackRequest object, but if that fails (returns null), it will + * fall back to: + + * A) attempting to parse out an incoming OpenSearch format query + * B) attempting to parse out any and all incoming form elements submitted as + * either GET or POST arguments + * + * This class also contains the functionality to extract HTTP header + * information into WaybackRequest objects, including Http auth info, referer, + * remote IPs, etc. + * + * @author brad + * @version $Date$, $Revision$ + */ +public abstract class BaseRequestParser implements RequestParser { + + protected final static String QUERY_BASE = "query"; + + protected final static String REPLAY_BASE = "replay"; + + protected final static int DEFAULT_MAX_RECORDS = 10; + + protected int maxRecords = DEFAULT_MAX_RECORDS; + + public void init(final Properties p) throws ConfigurationException { + PropertyConfiguration pc = new PropertyConfiguration(p); + maxRecords = pc.getInt(WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME, + DEFAULT_MAX_RECORDS); + } + + protected static String getMapParam(Map<String,String[]> queryMap, + String field) { + String arr[] = queryMap.get(field); + if (arr == null || arr.length == 0) { + return null; + } + return arr[0]; + } + + protected static String getRequiredMapParam(Map<String,String[]> queryMap, + String field) + throws BadQueryException { + String value = getMapParam(queryMap,field); + if(value == null) { + throw new BadQueryException("missing field " + field); + } + if(value.length() == 0) { + throw new BadQueryException("empty field " + field); + } + return value; + } + + protected static String getMapParamOrEmpty(Map<String,String[]> map, + String param) { + String val = getMapParam(map,param); + return (val == null) ? "" : val; + } + + + private void putUnlessNull(WaybackRequest request, String key, String val) { + if(val != null) { + request.put(key, val); + } + } + + protected void addHttpHeaderFields(WaybackRequest wbRequest, + HttpServletRequest httpRequest) { + + // attempt to get the HTTP referer if present.. + putUnlessNull(wbRequest,WaybackConstants.REQUEST_REFERER_URL, + httpRequest.getHeader("REFERER")); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_REMOTE_ADDRESS, + httpRequest.getRemoteAddr()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_HOSTNAME, + httpRequest.getLocalName()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_PORT, + String.valueOf(httpRequest.getLocalPort())); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_WAYBACK_CONTEXT, + httpRequest.getContextPath()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_AUTH_TYPE, + httpRequest.getAuthType()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_REMOTE_USER, + httpRequest.getRemoteUser()); + putUnlessNull(wbRequest,WaybackConstants.REQUEST_LOCALE_LANG, + httpRequest.getLocale().getDisplayLanguage()); + + wbRequest.setLocale(httpRequest.getLocale()); + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) + */ + public abstract WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException; + + /** + * @return the maxRecords + */ + public int getMaxRecords() { + return maxRecords; + } + + /** + * @param maxRecords the maxRecords to set + */ + public void setMaxRecords(int maxRecords) { + this.maxRecords = maxRecords; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/CompositeRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,89 @@ +/* CompositeRequestParser + * + * $Id$ + * + * Created on 4:52:13 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Properties; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.RequestParser; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.PropertyConfiguration; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class CompositeRequestParser extends BaseRequestParser { + private RequestParser[] parsers = null; + + public void init(final Properties p) throws ConfigurationException { + parsers = getRequestParsers(); + for(int i = 0; i < parsers.length; i++) { + parsers[i].init(p); + } + PropertyConfiguration pc = new PropertyConfiguration(p); + maxRecords = pc.getInt(WaybackConstants.RESULTS_PER_PAGE_CONFIG_NAME, + DEFAULT_MAX_RECORDS); + } + /** + * + */ + public void init() { + parsers = getRequestParsers(); + } + + + protected RequestParser[] getRequestParsers() { + RequestParser[] theParsers = { + new OpenSearchRequestParser(), + new FormRequestParser() + }; + return theParsers; + } + + /* (non-Javadoc) + * @see org.archive.wayback.RequestParser#parse(javax.servlet.http.HttpServletRequest) + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + WaybackRequest wbRequest = null; + + for(int i = 0; i < parsers.length; i++) { + wbRequest = parsers[i].parse(httpRequest, wbContext); + if(wbRequest != null) { + break; + } + } + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,91 @@ +/* FormRequestParser + * + * $Id$ + * + * Created on 4:45:06 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FormRequestParser extends BaseRequestParser { + /** + * CGI argument name for Submit buttom... + */ + private final static String SUBMIT_BUTTON = "Submit"; + + /* + * Stuff whatever GET/POST arguments are sent up into the returned + * WaybackRequest object, except the Submit button argument. + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) { + + WaybackRequest wbRequest = null; + @SuppressWarnings("unchecked") + Map<String,String[]> queryMap = httpRequest.getParameterMap(); + if(queryMap.size() > 0) { + wbRequest = new WaybackRequest(); + + String base = wbContext.translateRequestPath(httpRequest); + if(base.startsWith(REPLAY_BASE)) { + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + } else if(base.startsWith(QUERY_BASE)) { + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + } else { + return null; + } + + Set<String> keys = queryMap.keySet(); + Iterator<String> itr = keys.iterator(); + while(itr.hasNext()) { + String key = itr.next(); + if(key.equals(SUBMIT_BUTTON)) { + continue; + } + // just jam everything else in: + String val = getMapParam(queryMap,key); + wbRequest.put(key,val); + } + } + if(wbRequest != null) { + addHttpHeaderFields(wbRequest, httpRequest); + } + + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,147 @@ +/* OpenSearchRequestParser + * + * $Id$ + * + * Created on 4:47:03 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import java.util.Map; +import java.util.regex.Pattern; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class OpenSearchRequestParser extends BaseRequestParser { + + /** + * CGI argument name for query arguments + */ + public final static String SEARCH_QUERY = "q"; + + /** + * CGI argument name for number of results per page, 1 based + */ + public final static String SEARCH_RESULTS = "count"; + + /** + * CGI argument name for page number of results, 1 based + */ + public final static String START_PAGE = "start_page"; + + + // private final static String START_INDEX = "start_index"; + + private final static Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); + + // singles consume the next non-whitespace token following the term + // private String[] singleTokens = { "url", "site", "mimetype", "noredirect" }; + + // lines consume the entire rest of the query + private String[] lineTokens = { "terms" }; + + /* + * If the request includes a 'q' (query) argument, treat the request + * as an OpenSearch query, and extract all query terms, plus pagination + * info from the httpRequest object. + */ + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + WaybackRequest wbRequest = null; + @SuppressWarnings("unchecked") + Map<String,String[]> queryMap = httpRequest.getParameterMap(); + String query = getMapParam(queryMap, SEARCH_QUERY); + if(query == null) { + return null; + } + wbRequest = new WaybackRequest(); + + String base = wbContext.translateRequestPath(httpRequest); + if(base.startsWith(REPLAY_BASE)) { + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_REPLAY_QUERY); + } else if(base.startsWith(QUERY_BASE)){ + wbRequest.put(WaybackConstants.REQUEST_TYPE, + WaybackConstants.REQUEST_URL_QUERY); + } else { + return null; + } + + String numResults = getMapParam(queryMap, SEARCH_RESULTS); + String startPage = getMapParam(queryMap, START_PAGE); + + if (numResults != null) { + int nr = Integer.parseInt(numResults); + wbRequest.setResultsPerPage(nr); + } else { + wbRequest.setResultsPerPage(maxRecords); + } + if (startPage != null) { + int sp = Integer.parseInt(startPage); + wbRequest.setPageNum(sp); + } else { + wbRequest.setPageNum(1); + } + + // first try the entire line_tokens: + for (int i = 0; i < lineTokens.length; i++) { + String token = lineTokens[i] + ":"; + int index = query.indexOf(token); + if (index > -1) { + // found it, take value as the remainder of the query + String value = query.substring(index + token.length()); + // TODO: trim trailing whitespace? + wbRequest.put(lineTokens[i], value); + query = query.substring(0, index); + } + } + + // now split whatever is left on whitespace: + String[] parts = WHITESPACE_PATTERN.split(query); + for (int i = 0; i < parts.length; i++) { + String token = parts[i]; + int colonIndex = token.indexOf(":"); + if (colonIndex == -1) { + throw new BadQueryException("Bad search token(" + token + ")"); + } + String key = token.substring(0, colonIndex); + String value = token.substring(colonIndex + 1); + // TODO: make sure key is in singleTokens? + // let's just let em all thru for now: + wbRequest.put(key, value); + } + + addHttpHeaderFields(wbRequest, httpRequest); + + return wbRequest; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2007-07-16 23:33:58 UTC (rev 1795) @@ -0,0 +1,75 @@ +/* PathRequestParser + * + * $Id$ + * + * Created on 6:47:21 PM Apr 24, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.requestparser; + +import javax.servlet.http.HttpServletRequest; + +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.webapp.WaybackContext; + +/** + * Subclass of RequestParser that acquires key request information from the + * path component following the wayback context. + * + * @author brad + * @version $Date$, $Revision$ + */ +public abstract class PathRequestParser extends BaseRequestParser { + + /** + * @param requestPath + * @return WaybackRequest with information parsed from the requestPath, or + * null if information could not be extracted. + */ + public abstract WaybackRequest parse(String requestPath); + + /* (non-Javadoc) + * @see org.archive.wayback.requestparser.BaseRequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) + */ + @Override + public WaybackRequest parse(HttpServletRequest httpRequest, + WaybackContext wbContext) throws BadQueryException { + + String queryString = httpRequest.getQueryString(); + String origRequestPath = httpRequest.getRequestURI(); + + if (queryString != null) { + origRequestPath += "?" + queryString; + } + String contextPath = wbContext.getContextPath(httpRequest); + if (!origRequestPath.startsWith(contextPath)) { + return null; + } + String requestPath = origRequestPath.substring(contextPath.length()); + + WaybackRequest wbRequest = parse(requestPath); + if(wbRequest != null) { + addHttpHeaderFields(wbRequest, httpRequest); + } + + return wbRequest; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |