You can subscribe to this list here.
| 2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
| 2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
| 2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
| 2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
| 2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
| 2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
| 2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
|
From: <bra...@us...> - 2010-05-18 22:54:16
|
Revision: 3105
http://archive-access.svn.sourceforge.net/archive-access/?rev=3105&view=rev
Author: bradtofel
Date: 2010-05-18 22:54:10 +0000 (Tue, 18 May 2010)
Log Message:
-----------
INITIAL REV:
Added Paths:
-----------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java 2010-05-18 22:54:10 UTC (rev 3105)
@@ -0,0 +1,79 @@
+/* JSPReplayRenderer
+ *
+ * $Id$:
+ *
+ * Created on May 7, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.replay;
+
+import java.io.IOException;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.archive.wayback.ReplayRenderer;
+import org.archive.wayback.ResultURIConverter;
+import org.archive.wayback.core.CaptureSearchResult;
+import org.archive.wayback.core.CaptureSearchResults;
+import org.archive.wayback.core.Resource;
+import org.archive.wayback.core.UIResults;
+import org.archive.wayback.core.WaybackRequest;
+import org.archive.wayback.exception.WaybackException;
+
+/**
+ * ReplayRenderer implementation which just forwards responsibility for
+ * rendering a resource to a .jsp file.
+ *
+ * @author brad
+ *
+ */
+public class JSPReplayRenderer implements ReplayRenderer {
+ private String targetJsp = null;
+
+ public void renderResource(HttpServletRequest httpRequest,
+ HttpServletResponse httpResponse, WaybackRequest wbRequest,
+ CaptureSearchResult result, Resource resource,
+ ResultURIConverter uriConverter, CaptureSearchResults results)
+ throws ServletException, IOException, WaybackException {
+ UIResults uiResults =
+ new UIResults(wbRequest, uriConverter, results, result, resource);
+ uiResults.forward(httpRequest, httpResponse, targetJsp);
+ }
+
+ /**
+ * @return the context-relative path to the .jsp responsible for rendering
+ * the resource
+ */
+ public String getTargetJsp() {
+ return targetJsp;
+ }
+
+ /**
+ * @param targetJsp the context-relative path to the .jsp responsible for
+ * rendering the resource
+ */
+ public void setTargetJsp(String targetJsp) {
+ this.targetJsp = targetJsp;
+ }
+
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-18 22:46:13
|
Revision: 3104
http://archive-access.svn.sourceforge.net/archive-access/?rev=3104&view=rev
Author: bradtofel
Date: 2010-05-18 22:46:07 +0000 (Tue, 18 May 2010)
Log Message:
-----------
Translate escaped characters within resolved urls prior to contextualizing.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-18 22:44:22 UTC (rev 3103)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-18 22:46:07 UTC (rev 3104)
@@ -31,6 +31,7 @@
import org.apache.commons.httpclient.URIException;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
+import org.htmlparser.util.Translate;
/**
* Class which tracks the context and state involved with parsing an HTML
@@ -93,6 +94,7 @@
* @throws URISyntaxException if the input URL is malformed
*/
public String resolve(String url) throws URISyntaxException {
+ url = Translate.decode(url);
int hashIdx = url.indexOf('#');
String frag = "";
if(hashIdx != -1) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-18 22:44:29
|
Revision: 3103
http://archive-access.svn.sourceforge.net/archive-access/?rev=3103&view=rev
Author: bradtofel
Date: 2010-05-18 22:44:22 +0000 (Tue, 18 May 2010)
Log Message:
-----------
Experimental: changes to make a SearchResultSource that can be directly indexed by ordinal position.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java
Added Paths:
-----------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -0,0 +1,40 @@
+/* SequencedSearchResultSource
+ *
+ * $Id$:
+ *
+ * Created on May 14, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.resourceindex;
+
+import org.archive.wayback.core.CaptureSearchResult;
+import org.archive.wayback.exception.ResourceIndexNotAvailableException;
+import org.archive.wayback.util.CloseableIterator;
+
+/**
+ * @author brad
+ *
+ */
+public interface SequencedSearchResultSource extends SearchResultSource {
+ public CloseableIterator<CaptureSearchResult>
+ getPrefixIterator(final String prefix, int startIdx)
+ throws ResourceIndexNotAvailableException;
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -0,0 +1,63 @@
+/* SkippingStringPrefixIterator
+ *
+ * $Id$:
+ *
+ * Created on May 14, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.resourceindex.ziplines;
+
+import java.util.Iterator;
+
+/**
+ * @author brad
+ *
+ */
+public class SkippingStringPrefixIterator extends StringPrefixIterator {
+ private long skipCount = 0;
+ private long totalMatches = -1;
+
+ public SkippingStringPrefixIterator(Iterator<String> inner, String prefix,
+ long skipCount) {
+ super(inner,prefix);
+ this.skipCount = skipCount;
+ }
+ public SkippingStringPrefixIterator(Iterator<String> inner, String prefix) {
+ super(inner,prefix);
+ }
+ public long getTotalMatches() {
+ return totalMatches;
+ }
+ public void setTotalMatches(long totalMatches) {
+ this.totalMatches = totalMatches;
+ }
+ public boolean hasNext() {
+ while(skipCount > 0) {
+ if(super.hasNext()) {
+ next();
+ skipCount--;
+ } else {
+ return false;
+ }
+ }
+ return super.hasNext();
+ }
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java 2010-05-18 22:38:59 UTC (rev 3102)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -47,6 +47,9 @@
truncated = ((ZiplinesChunkIterator)inner).isTruncated();
}
}
+ public long getTotalMatches() {
+ return 0 ;
+ }
public boolean isTruncated() {
return truncated;
}
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -0,0 +1,141 @@
+/* ZiplineBlockMatches
+ *
+ * $Id$:
+ *
+ * Created on May 14, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.resourceindex.ziplines;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * @author brad
+ *
+ */
+public class ZiplineBlockMatches {
+ private ArrayList<ZiplinedBlock> blocks = null;
+ private String prefix = null;
+ private int cachedFirstCount = -1;
+ private int cachedLastCount = -1;
+ public ZiplineBlockMatches(ArrayList<ZiplinedBlock> blocks, String prefix) {
+ this.blocks = blocks;
+ this.prefix = prefix;
+ cachedFirstCount = -1;
+ cachedLastCount = -1;
+ }
+
+ public StringPrefixIterator getIterator() {
+ ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks);
+ zci.setTruncated(false);
+ return new StringPrefixIterator(zci,prefix);
+ }
+
+ public StringPrefixIterator getIteratorAt(long skip) throws IOException {
+ SkippingStringPrefixIterator itr = null;
+ ArrayList<ZiplinedBlock> matchingBlocked =
+ new ArrayList<ZiplinedBlock>();
+ long total = getTotalMatching();
+ if(skip > total) {
+ // TODO: should return empty itr...
+ return null;
+ }
+ long firstBlockMatches =
+ countMatchesInStartBlock(blocks.get(0), prefix);
+ if(skip < firstBlockMatches) {
+ ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks);
+ itr = new SkippingStringPrefixIterator(zci,prefix,skip);
+ itr.setTotalMatches(total);
+ return itr;
+ }
+ skip -= firstBlockMatches;
+ int size = blocks.size();
+ for(int i = 1; i < size; i++) {
+ ZiplinedBlock block = blocks.get(i);
+ if(block.count > skip) {
+ // this is the block to start:
+ ZiplinesChunkIterator zci =
+ new ZiplinesChunkIterator(blocks.subList(i, size));
+ itr = new SkippingStringPrefixIterator(zci,prefix,skip);
+ itr.setTotalMatches(total);
+ return itr;
+ }
+ skip -= block.count;
+ }
+ // should never get here...
+ return null;
+ }
+
+ public long getTotalMatching() throws IOException {
+ if(blocks == null) {
+ return 0;
+ }
+ int size = blocks.size();
+ if(size == 0) {
+ return 0;
+ }
+ long count = countMatchesInStartBlock(blocks.get(0),prefix);
+ if(size == 1) {
+ return count;
+ }
+ for(int i = 1; i < size-1; i++) {
+ count += blocks.get(i).count;
+ }
+ count += countMatchesInLastBlock(blocks.get(size-1), prefix);
+ return count;
+ }
+ private long countMatchesInStartBlock(ZiplinedBlock block, String prefix)
+ throws IOException {
+ if(cachedFirstCount == -1) {
+ BufferedReader r = block.readBlock();
+ int matches = block.count;
+ while(true) {
+ String nextLine = r.readLine();
+ if((nextLine == null) || nextLine.startsWith(prefix)) {
+ r.close();
+ cachedFirstCount = matches;
+ break;
+ }
+ matches--;
+ }
+ }
+ return cachedFirstCount;
+ }
+ private long countMatchesInLastBlock(ZiplinedBlock block, String prefix)
+ throws IOException {
+ if(cachedLastCount == -1) {
+ BufferedReader r = block.readBlock();
+ int matches = 0;
+ while(true) {
+ String nextLine = r.readLine();
+ if((nextLine == null) || !nextLine.startsWith(prefix)) {
+ r.close();
+ cachedLastCount = matches;
+ break;
+ }
+ matches++;
+ }
+ }
+ return cachedLastCount;
+ }
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-05-18 22:38:59 UTC (rev 3102)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -44,6 +44,7 @@
String urlOrPath = null;
long offset = -1;
+ int count = 0;
public final static int BLOCK_SIZE = 128 * 1024;
private final static String RANGE_HEADER = "Range";
private final static String BYTES_HEADER = "bytes=";
@@ -53,8 +54,17 @@
* @param offset start of 128K block boundary.
*/
public ZiplinedBlock(String urlOrPath, long offset) {
+ this(urlOrPath,offset,0);
+ }
+ /**
+ * @param urlOrPath URL where this file can be downloaded
+ * @param offset start of 128K block boundary.
+ * @param count number of records in this block
+ */
+ public ZiplinedBlock(String urlOrPath, long offset, int count) {
this.urlOrPath = urlOrPath;
this.offset = offset;
+ this.count = count;
}
/**
* @return a BufferedReader of the underlying compressed data in this block
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -0,0 +1,107 @@
+/* ZiplinedBlockIndex
+ *
+ * $Id$:
+ *
+ * Created on May 14, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.resourceindex.ziplines;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.archive.wayback.exception.ResourceIndexNotAvailableException;
+import org.archive.wayback.util.CloseableIterator;
+import org.archive.wayback.util.flatfile.FlatFile;
+
+/**
+ * @author brad
+ *
+ */
+public class ZiplinedBlockStringSequence {
+ private FlatFile chunkIndex = null;
+ private HashMap<String,String> chunkMap = null;
+ private int maxBlocks = 10000;
+
+ public ZiplinedBlockStringSequence(FlatFile chunkIndex,
+ HashMap<String,String> chunkMap) {
+ this.chunkIndex = chunkIndex;
+ this.chunkMap = chunkMap;
+ }
+
+ private ZiplineBlockMatches getBlockMatches(String prefix)
+ throws IOException, ResourceIndexNotAvailableException {
+ ArrayList<ZiplinedBlock> blocks = new ArrayList<ZiplinedBlock>();
+ boolean first = true;
+ int numBlocks = 0;
+ boolean truncated = false;
+ CloseableIterator<String> itr = null;
+ try {
+ itr = chunkIndex.getRecordIteratorLT(prefix);
+ while(itr.hasNext()) {
+ if(numBlocks >= maxBlocks) {
+ truncated = true;
+ break;
+ }
+ String blockDescriptor = itr.next();
+ numBlocks++;
+ String parts[] = blockDescriptor.split("\t");
+ if(parts.length != 4) {
+ throw new ResourceIndexNotAvailableException("Bad line(" +
+ blockDescriptor + ")");
+ }
+ // only compare the correct length:
+ String prefCmp = prefix;
+ String blockCmp = parts[0];
+ if(first) {
+ // always add first:
+ first = false;
+ } else if(!blockCmp.startsWith(prefCmp)) {
+ // all done;
+ break;
+ }
+ // add this and keep lookin...
+ String url = chunkMap.get(parts[1]);
+ long offset = Long.parseLong(parts[2]);
+ int count = Integer.parseInt(parts[3]);
+
+ blocks.add(new ZiplinedBlock(url, offset, count));
+ }
+ } finally {
+ if(itr != null) {
+ itr.close();
+ }
+ }
+ return new ZiplineBlockMatches(blocks,prefix);
+ }
+
+ public StringPrefixIterator getIterator(String prefix, long skip)
+ throws ResourceIndexNotAvailableException, IOException {
+ ZiplineBlockMatches matches = getBlockMatches(prefix);
+ return matches.getIteratorAt(skip);
+ }
+ public StringPrefixIterator getIterator(String prefix)
+ throws ResourceIndexNotAvailableException, IOException {
+ ZiplineBlockMatches matches = getBlockMatches(prefix);
+ return matches.getIterator();
+ }
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -0,0 +1,85 @@
+/* ZiplinedBlockStringSequenceTest
+ *
+ * $Id$:
+ *
+ * Created on May 14, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.resourceindex.ziplines;
+
+import java.io.IOException;
+import java.util.HashMap;
+
+import org.archive.wayback.exception.ResourceIndexNotAvailableException;
+import org.archive.wayback.util.CloseableIterator;
+import org.archive.wayback.util.flatfile.FlatFile;
+
+import junit.framework.TestCase;
+
+/**
+ * @author brad
+ *
+ */
+public class ZiplinedBlockStringSequenceTest extends TestCase {
+ private String indexPath = "/home/brad/os-cdx/CDX-201002-clean/ALL.count.summary";
+ private String mapPath = "/home/brad/os-cdx/CDX-201002-clean/ALL.loc-workstation";
+
+ private ZiplinedBlockStringSequence getSequence() throws IOException {
+ HashMap<String, String> chunkMap = new HashMap<String, String>();
+ FlatFile ff = new FlatFile(mapPath);
+ CloseableIterator<String> lines = ff.getSequentialIterator();
+ while(lines.hasNext()) {
+ String line = lines.next();
+ String[] parts = line.split("\\s");
+ if(parts.length != 2) {
+ throw new IOException("Bad line(" + line +") in (" +
+ mapPath + ")");
+ }
+ chunkMap.put(parts[0],parts[1]);
+ }
+ lines.close();
+ FlatFile chunkIndex = new FlatFile(indexPath);
+ return new ZiplinedBlockStringSequence(chunkIndex, chunkMap);
+ }
+ /**
+ * Test method for {@link org.archive.wayback.resourceindex.ziplines.ZiplinedBlockStringSequence#getIterator(java.lang.String, long)}.
+ * @throws IOException
+ * @throws ResourceIndexNotAvailableException
+ */
+ public void testGetIteratorStringLong() throws IOException, ResourceIndexNotAvailableException {
+ ZiplinedBlockStringSequence seq = getSequence();
+ StringPrefixIterator itr = seq.getIterator("yahoo.com/", 1000000);
+ System.out.format("Total Matches %d\n",itr.getTotalMatches());
+ for(int i = 0; i < 10; i++) {
+ if(itr.hasNext()) {
+ System.out.format("Line(%d): %s\n",i,itr.next());
+ }
+ }
+ }
+
+ /**
+ * Test method for {@link org.archive.wayback.resourceindex.ziplines.ZiplinedBlockStringSequence#getIterator(java.lang.String)}.
+ */
+ public void testGetIteratorString() {
+// fail("Not yet implemented");
+ }
+
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-05-18 22:38:59 UTC (rev 3102)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-05-18 22:44:22 UTC (rev 3103)
@@ -39,6 +39,7 @@
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.exception.ResourceIndexNotAvailableException;
import org.archive.wayback.resourceindex.SearchResultSource;
+import org.archive.wayback.resourceindex.SequencedSearchResultSource;
import org.archive.wayback.resourceindex.cdx.CDXFormatToSearchResultAdapter;
import org.archive.wayback.resourceindex.cdx.format.CDXFormat;
import org.archive.wayback.resourceindex.cdx.format.CDXFormatException;
@@ -132,10 +133,9 @@
throw new ResourceIndexNotAvailableException(e.getMessage());
}
}
-
- public Iterator<String> getStringPrefixIterator(String prefix)
- throws ResourceIndexNotAvailableException, IOException {
+ private ArrayList<ZiplinedBlock> getBlockListForPrefix(String prefix)
+ throws IOException, ResourceIndexNotAvailableException {
ArrayList<ZiplinedBlock> blocks = new ArrayList<ZiplinedBlock>();
boolean first = true;
int numBlocks = 0;
@@ -175,8 +175,15 @@
itr.close();
}
}
+ return blocks;
+ }
+
+ public Iterator<String> getStringPrefixIterator(String prefix)
+ throws ResourceIndexNotAvailableException, IOException {
+
+ ArrayList<ZiplinedBlock> blocks = getBlockListForPrefix(prefix);
ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks);
- zci.setTruncated(truncated);
+ zci.setTruncated(false);
return new StringPrefixIterator(zci,prefix);
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-18 22:39:06
|
Revision: 3102
http://archive-access.svn.sourceforge.net/archive-access/?rev=3102&view=rev
Author: bradtofel
Date: 2010-05-18 22:38:59 +0000 (Tue, 18 May 2010)
Log Message:
-----------
INTERFACE: made guessed charset HTTP header name public
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-05-17 19:52:24 UTC (rev 3101)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-05-18 22:38:59 UTC (rev 3102)
@@ -50,7 +50,7 @@
*/
public abstract class TextReplayRenderer implements ReplayRenderer {
- private static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset";
+ public static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset";
private String guessedCharsetHeader = GUESSED_CHARSET_HEADER;
private List<String> jspInserts = null;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3101
http://archive-access.svn.sourceforge.net/archive-access/?rev=3101&view=rev
Author: bradtofel
Date: 2010-05-17 19:52:24 +0000 (Mon, 17 May 2010)
Log Message:
-----------
FEATURE: exposed addDefaults() to internal ProxyReplayRequestParser, to enable/disable the possibly slow getLocalHostnames lookup
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java 2010-05-17 19:50:11 UTC (rev 3100)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java 2010-05-17 19:52:24 UTC (rev 3101)
@@ -105,4 +105,17 @@
}
return wbRequest;
}
+ /**
+ * @return the addDefaults
+ */
+ public boolean isAddDefaults() {
+ return prrp.isAddDefaults();
+ }
+
+ /**
+ * @param addDefaults the addDefaults to set
+ */
+ public void setAddDefaults(boolean addDefaults) {
+ prrp.setAddDefaults(addDefaults);
+ }
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3100
http://archive-access.svn.sourceforge.net/archive-access/?rev=3100&view=rev
Author: bradtofel
Date: 2010-05-17 19:50:11 +0000 (Mon, 17 May 2010)
Log Message:
-----------
INTERFACE: made makeFlagDateSpec() public, added javadoc, and also made identity flag get forwarded.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java 2010-05-17 19:48:36 UTC (rev 3099)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java 2010-05-17 19:50:11 UTC (rev 3100)
@@ -63,9 +63,19 @@
String betterURI = uriConverter.makeReplayURI(captureDate,url);
httpResponse.sendRedirect(betterURI);
}
- private String makeFlagDateSpec(String dateSpec, WaybackRequest request) {
+
+ /**
+ * Given a date, and a WaybackRequest object, create a new datespec + flags
+ * which represent the same options as requested by the WaybackRequest
+ * @param timestamp the 14-digit timestamp to use
+ * @param request the WaybackRequest from which o get extra request option
+ * flags
+ * @return a String representing the flags on the WaybackRequest for the
+ * specified date
+ */
+ public static String makeFlagDateSpec(String timestamp, WaybackRequest request) {
StringBuilder sb = new StringBuilder();
- sb.append(dateSpec);
+ sb.append(timestamp);
if(request.isCSSContext()) {
sb.append(ArchivalUrlRequestParser.CSS_CONTEXT);
sb.append(ArchivalUrlRequestParser.FLAG_DELIM);
@@ -78,6 +88,10 @@
sb.append(ArchivalUrlRequestParser.IMG_CONTEXT);
sb.append(ArchivalUrlRequestParser.FLAG_DELIM);
}
+ if(request.isIdentityContext()) {
+ sb.append(ArchivalUrlRequestParser.IDENTITY_CONTEXT);
+ sb.append(ArchivalUrlRequestParser.FLAG_DELIM);
+ }
return sb.toString();
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3099
http://archive-access.svn.sourceforge.net/archive-access/?rev=3099&view=rev
Author: bradtofel
Date: 2010-05-17 19:48:36 +0000 (Mon, 17 May 2010)
Log Message:
-----------
FEATURE: now strips default ports from urls when rewriting as replay URLs.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java 2010-05-17 19:45:41 UTC (rev 3098)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java 2010-05-17 19:48:36 UTC (rev 3099)
@@ -25,6 +25,7 @@
package org.archive.wayback.archivalurl;
import org.archive.wayback.ResultURIConverter;
+import org.archive.wayback.util.url.UrlOperations;
/**
*
@@ -42,15 +43,24 @@
* @see org.archive.wayback.ResultURIConverter#makeReplayURI(java.lang.String, java.lang.String)
*/
public String makeReplayURI(String datespec, String url) {
- String suffix = datespec + "/" + url;
+ StringBuilder sb = null;
+
if(replayURIPrefix == null) {
- return suffix;
- } else {
- if(url.startsWith(replayURIPrefix)) {
- return url;
- }
- return replayURIPrefix + suffix;
+ sb = new StringBuilder(url.length() + datespec.length());
+ sb.append(datespec);
+ sb.append("/");
+ sb.append(UrlOperations.stripDefaultPortFromUrl(url));
+ return sb.toString();
}
+ if(url.startsWith(replayURIPrefix)) {
+ return url;
+ }
+ sb = new StringBuilder(url.length() + datespec.length());
+ sb.append(replayURIPrefix);
+ sb.append(datespec);
+ sb.append("/");
+ sb.append(UrlOperations.stripDefaultPortFromUrl(url));
+ return sb.toString();
}
/**
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3098
http://archive-access.svn.sourceforge.net/archive-access/?rev=3098&view=rev
Author: bradtofel
Date: 2010-05-17 19:45:41 +0000 (Mon, 17 May 2010)
Log Message:
-----------
LOGGING
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2010-05-17 19:44:24 UTC (rev 3097)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2010-05-17 19:45:41 UTC (rev 3098)
@@ -116,11 +116,14 @@
String name = handler.getBeanName();
if(name != null) {
if(name.equals(RequestMapper.GLOBAL_PRE_REQUEST_HANDLER)) {
-
+ LOGGER.info("Registering Global-pre request handler:" +
+ handler);
mapper.addGlobalPreRequestHandler(handler);
} else if(name.equals(RequestMapper.GLOBAL_POST_REQUEST_HANDLER)) {
+ LOGGER.info("Registering Global-post request handler:" +
+ handler);
mapper.addGlobalPostRequestHandler(handler);
} else {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3097
http://archive-access.svn.sourceforge.net/archive-access/?rev=3097&view=rev
Author: bradtofel
Date: 2010-05-17 19:44:24 +0000 (Mon, 17 May 2010)
Log Message:
-----------
TWEAK: added tests for resolving empty path
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-17 19:37:09 UTC (rev 3096)
+++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-17 19:44:24 UTC (rev 3097)
@@ -115,7 +115,11 @@
UrlOperations.resolveUrl(scheme + "a.org/3/","1/2"));
assertEquals(scheme + "a.org/1/2",
- UrlOperations.resolveUrl(scheme + "a.org/3","1/2"));
+ UrlOperations.resolveUrl(scheme + "a.org/3","1/2"));
+ assertEquals(scheme + "a.org/3",
+ UrlOperations.resolveUrl(scheme + "a.org/3",""));
+ assertEquals(scheme + "a.org/3.html",
+ UrlOperations.resolveUrl(scheme + "a.org/3.html",""));
}
}
public void testUrlToScheme() {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3096
http://archive-access.svn.sourceforge.net/archive-access/?rev=3096&view=rev
Author: bradtofel
Date: 2010-05-17 19:37:09 +0000 (Mon, 17 May 2010)
Log Message:
-----------
BUGFIX(unreported): now adds 'http' to request URL if scheme is missing.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-05-17 19:33:41 UTC (rev 3095)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-05-17 19:37:09 UTC (rev 3096)
@@ -32,6 +32,7 @@
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.util.Timestamp;
+import org.archive.wayback.util.url.UrlOperations;
import org.archive.wayback.webapp.AccessPoint;
/**
@@ -89,6 +90,12 @@
}
// just jam everything else in:
String val = AccessPoint.getMapParam(queryMap,key);
+ if(key.equals(WaybackRequest.REQUEST_URL)) {
+ String scheme = UrlOperations.urlToScheme(val);
+ if(scheme == null) {
+ val = UrlOperations.HTTP_SCHEME + val;
+ }
+ }
wbRequest.put(key,val);
}
String partialTS = wbRequest.getReplayTimestamp();
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3095
http://archive-access.svn.sourceforge.net/archive-access/?rev=3095&view=rev
Author: bradtofel
Date: 2010-05-17 19:33:41 +0000 (Mon, 17 May 2010)
Log Message:
-----------
FEATURE: now uses resource code to auto-detect and unchunkify chunked content. also does not forward on some hop-by-hop HTTP headers
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-05-17 19:28:20 UTC (rev 3094)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-05-17 19:33:41 UTC (rev 3095)
@@ -26,19 +26,24 @@
package org.archive.wayback.liveweb;
import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
import java.util.zip.GZIPInputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
-import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.log4j.Logger;
import org.archive.io.arc.ARCRecord;
+import org.archive.wayback.core.Resource;
+import org.archive.wayback.exception.ResourceNotAvailableException;
+import org.archive.wayback.resourcestore.resourcefile.ResourceFactory;
import org.archive.wayback.util.ByteOp;
import org.archive.wayback.util.webapp.AbstractRequestHandler;
@@ -56,6 +61,8 @@
*/
public class ARCUnwrappingProxy extends AbstractRequestHandler {
+ private static final Logger LOGGER =
+ Logger.getLogger(ARCUnwrappingProxy.class.getName());
private MultiThreadedHttpConnectionManager connectionManager = null;
private HostConfiguration hostConfiguration = null;
/**
@@ -75,7 +82,6 @@
sb.append("?").append(query);
}
HttpMethod method = new GetMethod(sb.toString());
-// method.addRequestHeader("User-Agent", userAgent);
boolean got200 = false;
try {
HttpClient http = new HttpClient(connectionManager);
@@ -87,14 +93,28 @@
new ARCRecord(new GZIPInputStream(
method.getResponseBodyAsStream()),
"id",0L,false,false,true);
- r.skipHttpHeader();
- httpResponse.setStatus(r.getStatusCode());
- Header headers[] = r.getHttpHeaders();
- for(Header header : headers) {
- httpResponse.addHeader(header.getName(), header.getValue());
+ Resource res = null;
+ try {
+ res = ResourceFactory.ARCArchiveRecordToResource(r, null);
+ } catch (ResourceNotAvailableException e) {
+ LOGGER.error(e);
+ throw new IOException(e);
+ }
+ httpResponse.setStatus(res.getStatusCode());
+
+ Map<String,String> headers = res.getHttpHeaders();
+ Iterator<String> keys = headers.keySet().iterator();
+ while(keys.hasNext()) {
+ String key = keys.next();
+ if(!key.equalsIgnoreCase("Connection")
+ && !key.equalsIgnoreCase("Content-Length")
+ && !key.equalsIgnoreCase("Transfer-Encoding")) {
+ String value = headers.get(key);
+ httpResponse.addHeader(key, value);
+ }
}
- ByteOp.copyStream(r, httpResponse.getOutputStream());
+ ByteOp.copyStream(res, httpResponse.getOutputStream());
got200 = true;
}
} finally {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-17 19:28:26
|
Revision: 3094
http://archive-access.svn.sourceforge.net/archive-access/?rev=3094&view=rev
Author: bradtofel
Date: 2010-05-17 19:28:20 +0000 (Mon, 17 May 2010)
Log Message:
-----------
BUGFIX: now allows spaces(' ') after the chunk hex byte length when detecting chunked contents.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-05-17 19:27:12 UTC (rev 3093)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-05-17 19:28:20 UTC (rev 3094)
@@ -115,7 +115,8 @@
// better be a hex character:
if(isHex(nextC)) {
hexFound++;
- } else {
+ } else if(nextC != ' ') {
+ // allow whitespace before or after chunk...
// not a hex digit: not a chunked stream.
break;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3093
http://archive-access.svn.sourceforge.net/archive-access/?rev=3093&view=rev
Author: bradtofel
Date: 2010-05-17 19:27:12 +0000 (Mon, 17 May 2010)
Log Message:
-----------
INITIAL REV: leaner default implementation of current server-side rewrite rules.
Added Paths:
-----------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2010-05-17 19:27:12 UTC (rev 3093)
@@ -0,0 +1,346 @@
+/* FastArchivalUrlReplayParseEventHandler
+ *
+ * $Id$:
+ *
+ * Created on May 4, 2010.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.archivalurl;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+
+import javax.servlet.ServletException;
+
+import org.archive.wayback.replay.html.ReplayParseContext;
+import org.archive.wayback.replay.html.StringTransformer;
+import org.archive.wayback.replay.html.transformer.BlockCSSStringTransformer;
+import org.archive.wayback.replay.html.transformer.InlineCSSStringTransformer;
+import org.archive.wayback.replay.html.transformer.JSStringTransformer;
+import org.archive.wayback.replay.html.transformer.MetaRefreshUrlStringTransformer;
+import org.archive.wayback.replay.html.transformer.URLStringTransformer;
+import org.archive.wayback.util.htmllex.NodeUtils;
+import org.archive.wayback.util.htmllex.ParseContext;
+import org.archive.wayback.util.htmllex.ParseEventHandler;
+import org.htmlparser.Node;
+import org.htmlparser.nodes.TagNode;
+import org.htmlparser.nodes.TextNode;
+
+/**
+ * Lean and mean ParseEventHandler implementing current best-known server-side
+ * HTML rewrite rules, and should be much faster than the fully configurable
+ * version.
+ *
+ * @author brad
+ *
+ */
+public class FastArchivalUrlReplayParseEventHandler implements
+ ParseEventHandler {
+
+ private final static String FERRET_DONE_KEY =
+ FastArchivalUrlReplayParseEventHandler.class.toString();
+
+ private String jspInsertPath = "/WEB-INF/replay/DisclaimChooser.jsp";
+
+ private final String[] okHeadTags = { "!DOCTYPE", "HTML", "HEAD", "BASE",
+ "LINK", "META", "TITLE", "STYLE", "SCRIPT", "BODY" };
+ private HashMap<String, Object> okHeadTagMap = null;
+ private final static String FRAMESET_TAG = "FRAMESET";
+ private final static String BODY_TAG = "BODY";
+
+ private static BlockCSSStringTransformer cssBlockTrans =
+ new BlockCSSStringTransformer();
+ private static InlineCSSStringTransformer cssInlineTrans =
+ new InlineCSSStringTransformer();
+ private static JSStringTransformer jsBlockTrans =
+ new JSStringTransformer();
+ private static MetaRefreshUrlStringTransformer metaRefreshTrans =
+ new MetaRefreshUrlStringTransformer();
+ private static URLStringTransformer anchorUrlTrans =
+ new URLStringTransformer();
+ private static URLStringTransformer cssUrlTrans =
+ new URLStringTransformer("cs_");
+ private static URLStringTransformer jsUrlTrans =
+ new URLStringTransformer("js_");
+ private static URLStringTransformer imageUrlTrans =
+ new URLStringTransformer("im_");
+
+ /** Constructor... */
+ public FastArchivalUrlReplayParseEventHandler() {
+ okHeadTagMap = new HashMap<String, Object>(okHeadTags.length);
+ for (String tag : okHeadTags) {
+ okHeadTagMap.put(tag, null);
+ }
+ }
+
+ // TODO: This should all be refactored up into an abstract base class with
+ // default no-op methods, allowing a subclass to only override the ones they
+ // want...
+ public void handleNode(ParseContext pContext, Node node)
+ throws IOException {
+ ReplayParseContext context = (ReplayParseContext) pContext;
+ if(NodeUtils.isRemarkNode(node)) {
+// RemarkNode remarkNode = (RemarkNode) node;
+// handleRemarkTextNode(context,remarkNode);
+ emit(context,null,node,null);
+
+ } else if(NodeUtils.isTextNode(node)) {
+ TextNode textNode = (TextNode) node;
+ if(context.isInCSS()) {
+ handleCSSTextNode(context,textNode);
+
+ } else if(context.isInScriptText()) {
+ handleJSTextNode(context,textNode);
+ } else {
+ emit(context,null,textNode,null);
+// handleContentTextNode(context,textNode);
+ }
+ } else if(NodeUtils.isTagNode(node)) {
+ TagNode tagNode = (TagNode) node;
+ if(tagNode.isEndTag()) {
+ emit(context,null,tagNode,null);
+// handleCloseTagNode(context,tagNode);
+ } else {
+ // assume start, possibly empty:
+ handleOpenTagNode(context,tagNode);
+ }
+ } else {
+ throw new IllegalArgumentException("Unknown node type..");
+ }
+ }
+
+ /**
+ * @param context
+ * @param textNode
+ * @throws IOException
+ */
+ private void handleCSSTextNode(ReplayParseContext context, TextNode textNode) throws IOException {
+ textNode.setText(cssBlockTrans.transform(context, textNode.getText()));
+ emit(context,null,textNode,null);
+ }
+ /**
+ * @param context
+ * @param textNode
+ * @throws IOException
+ */
+ private void handleJSTextNode(ReplayParseContext context, TextNode textNode) throws IOException {
+ textNode.setText(jsBlockTrans.transform(context, textNode.getText()));
+ emit(context,null,textNode,null);
+ }
+
+ private void handleOpenTagNode(ReplayParseContext context, TagNode tagNode)
+ throws IOException {
+
+ boolean insertedJsp = context.getData(FERRET_DONE_KEY) != null;
+ String preEmit = null;
+ String postEmit = null;
+
+ String tagName = tagNode.getTagName();
+ // Time to insert the JSP header?
+ if(!insertedJsp) {
+ if(!okHeadTagMap.containsKey(tagName)) {
+ if(tagName.equals(FRAMESET_TAG)) {
+ // don't put the insert in framsets:
+ } else {
+ String tmp = null;
+ try {
+ tmp =
+ context.getJspExec().jspToString(jspInsertPath);
+ } catch (ServletException e) {
+ e.printStackTrace();
+ }
+ if (tagName.equals(BODY_TAG)) {
+ // insert it now, *after* the current Tag:
+ postEmit = tmp;
+ } else {
+ // hrm... we are seeing a node that should be in
+ // the body.. lets emit the jsp now, *before*
+ // the current Tag:
+ preEmit = tmp;
+ }
+ }
+ context.putData(FERRET_DONE_KEY,"");
+ }
+ }
+ // now do all the usual attribute rewriting:
+ // this could be slightly optimized by moving tags more likely to occur
+ // to the front of the if/else if/else if routing...
+
+ if(tagName.equals("A")) {
+ transformAttr(context, tagNode, "HREF", anchorUrlTrans);
+
+ } else if(tagName.equals("APPLET")) {
+ transformAttr(context, tagNode, "CODEBASE", anchorUrlTrans);
+ transformAttr(context, tagNode, "ARCHIVE", anchorUrlTrans);
+
+ } else if(tagName.equals("AREA")) {
+ transformAttr(context, tagNode, "HREF", anchorUrlTrans);
+
+ } else if(tagName.equals("BASE")) {
+ String orig = tagNode.getAttribute("HREF");
+ if(orig != null) {
+ try {
+ context.setBaseUrl(new URL(orig));
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ }
+ }
+
+ } else if(tagName.equals("EMBED")) {
+ transformAttr(context, tagNode, "SRC", anchorUrlTrans);
+
+ } else if(tagName.equals("IFRAME")) {
+ transformAttr(context, tagNode, "SRC", anchorUrlTrans);
+
+ } else if(tagName.equals("IMG")) {
+ transformAttr(context, tagNode, "SRC", imageUrlTrans);
+
+ } else if(tagName.equals("INPUT")) {
+ transformAttr(context, tagNode, "SRC", imageUrlTrans);
+
+ } else if(tagName.equals("FORM")) {
+ transformAttr(context, tagNode, "ACTION", anchorUrlTrans);
+
+ } else if(tagName.equals("FRAME")) {
+ transformAttr(context, tagNode, "SRC", anchorUrlTrans);
+
+ } else if(tagName.equals("LINK")) {
+ if(transformAttrWhere(context, tagNode, "REL", "STYLESHEET",
+ "HREF",cssUrlTrans)) {
+ // no-op
+ } else if(transformAttrWhere(context,tagNode,"REL","SHORTCUT ICON",
+ "HREF", imageUrlTrans)) {
+ // no-op
+ } else {
+ transformAttr(context, tagNode, "HREF", anchorUrlTrans);
+ }
+
+ } else if(tagName.equals("META")) {
+ transformAttrWhere(context, tagNode, "HTTP-EQUIV", "REFRESH",
+ "CONTENT", metaRefreshTrans);
+ transformAttr(context, tagNode, "URL", anchorUrlTrans);
+
+ } else if(tagName.equals("OBJECT")) {
+ transformAttr(context, tagNode, "CODEBASE", anchorUrlTrans);
+ transformAttr(context, tagNode, "CDATA", anchorUrlTrans);
+
+ } else if(tagName.equals("SCRIPT")) {
+ transformAttr(context, tagNode, "SRC", jsUrlTrans);
+ }
+ // now, for *all* tags...
+ transformAttr(context,tagNode,"BACKGROUND", imageUrlTrans);
+ transformAttr(context,tagNode,"STYLE", cssInlineTrans);
+ transformAttr(context,tagNode,"onclick", jsBlockTrans);
+
+ emit(context,preEmit,tagNode,postEmit);
+ }
+
+ private void emit(ReplayParseContext context, String pre, Node node,
+ String post) throws IOException {
+
+ OutputStream out = context.getOutputStream();
+ if(out != null) {
+ Charset charset = Charset.forName(context.getOutputCharset());
+
+ if(pre != null) {
+
+ out.write(pre.getBytes(charset));
+ }
+
+ out.write(node.toHtml(true).getBytes(charset));
+
+ if(post != null) {
+
+ out.write(post.getBytes(charset));
+ }
+ }
+ }
+
+ /**
+ * Transform a particular attribute on a TagNode, if that TagNode has a
+ * previous value for the updated attribute, AND if that TagNode contains
+ * another named attribute with a specific value.
+ *
+ * @param context the ReplayParseContext
+ * @param node the TagNode to be updated
+ * @param attrName update only occurs if the TagNode has an attribute with
+ * this name.
+ * @param attrVal update only occurs if the TagNode has an attribute
+ * attrName has this value, case insensitive. In fact as an optimization,
+ * it is ASSUMED that this argument is already UPPER-CASED
+ * @param modAttr the attribute value to update
+ * @param transformer the StringTransformer responsible for creating the
+ * new value based on the old one.
+ * @return true if the attribute was updated.
+ */
+ private boolean transformAttrWhere(ReplayParseContext context, TagNode node,
+ String attrName, String attrVal, String modAttr,
+ StringTransformer transformer) {
+ String val = node.getAttribute(attrName);
+ if(val != null) {
+ if(val.toUpperCase().equals(attrVal)) {
+ return transformAttr(context,node,modAttr,transformer);
+ }
+ }
+ return false;
+ }
+ /**
+ * Transform a particular attribute on a TagNode, iff that attribute exists
+ *
+ * @param context The ReplayParseContext being transformed
+ * @param node the TagNode to update
+ * @param attr the attribute name to transform
+ * @param transformer the StringTransformer responsible for creating the
+ * new value
+ * @return true if the attribute was found and updated
+ */
+ private boolean transformAttr(ReplayParseContext context, TagNode node,
+ String attr, StringTransformer transformer) {
+ String orig = node.getAttribute(attr);
+ if(orig != null) {
+ node.setAttribute(attr,
+ transformer.transform(context, orig));
+ return true;
+ }
+ return false;
+ }
+ public void handleParseComplete(ParseContext context) throws IOException {
+ // Nothing to do.
+ }
+
+ /**
+ * @return the jspInsertPath
+ */
+ public String getJspInsertPath() {
+ return jspInsertPath;
+ }
+
+ /**
+ * @param jspInsertPath the jspInsertPath to set
+ */
+ public void setJspInsertPath(String jspInsertPath) {
+ this.jspInsertPath = jspInsertPath;
+ }
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3092
http://archive-access.svn.sourceforge.net/archive-access/?rev=3092&view=rev
Author: bradtofel
Date: 2010-05-17 19:26:12 +0000 (Mon, 17 May 2010)
Log Message:
-----------
TWEAK: changed declared event delgator to ParseEventHandler
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2010-05-14 22:52:50 UTC (rev 3091)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2010-05-17 19:26:12 UTC (rev 3092)
@@ -44,11 +44,12 @@
import org.archive.wayback.replay.HttpHeaderOperation;
import org.archive.wayback.replay.HttpHeaderProcessor;
import org.archive.wayback.replay.JSPExecutor;
+import org.archive.wayback.replay.TextReplayRenderer;
import org.archive.wayback.replay.charset.CharsetDetector;
import org.archive.wayback.replay.charset.StandardCharsetDetector;
-import org.archive.wayback.replay.html.ReplayParseEventDelegator;
import org.archive.wayback.replay.html.ReplayParseContext;
import org.archive.wayback.util.htmllex.ContextAwareLexer;
+import org.archive.wayback.util.htmllex.ParseEventHandler;
import org.htmlparser.Node;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexer.Page;
@@ -62,7 +63,7 @@
*
*/
public class ArchivalUrlSAXRewriteReplayRenderer implements ReplayRenderer {
- private ReplayParseEventDelegator delegator = null;
+ private ParseEventHandler delegator = null;
private HttpHeaderProcessor httpHeaderProcessor;
private CharsetDetector charsetDetector = new StandardCharsetDetector();
private final static String OUTPUT_CHARSET = "utf-8";
@@ -142,7 +143,7 @@
// set the corrected length:
headers.put(HttpHeaderOperation.HTTP_LENGTH_HEADER,
String.valueOf(utf8Bytes.length));
- headers.put("X-Wayback-Guessed-Charset", charSet);
+ headers.put(TextReplayRenderer.GUESSED_CHARSET_HEADER, charSet);
// send back the headers:
HttpHeaderOperation.sendHeaders(headers, httpResponse);
@@ -173,14 +174,14 @@
/**
* @return the delegator
*/
- public ReplayParseEventDelegator getDelegator() {
+ public ParseEventHandler getDelegator() {
return delegator;
}
/**
* @param delegator the delegator to set
*/
- public void setDelegator(ReplayParseEventDelegator delegator) {
+ public void setDelegator(ParseEventHandler delegator) {
this.delegator = delegator;
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-14 22:52:56
|
Revision: 3091
http://archive-access.svn.sourceforge.net/archive-access/?rev=3091&view=rev
Author: bradtofel
Date: 2010-05-14 22:52:50 +0000 (Fri, 14 May 2010)
Log Message:
-----------
FEATURE: added discardStream() method
JAVADOC
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2010-05-13 18:34:37 UTC (rev 3090)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2010-05-14 22:52:50 UTC (rev 3091)
@@ -28,34 +28,100 @@
import java.io.InputStream;
import java.io.OutputStream;
+/**
+ * Byte oriented static methods. Likely a lot of overlap with apache- commons
+ * stuff - eventually should be reconciled.
+ *
+ * @author brad
+ *
+ */
public class ByteOp {
+ /** Default buffer size for IO ops */
public final static int BUFFER_SIZE = 4096;
+ /**
+ * Create a new byte array with contents initialized to values from the
+ * argument byte array.
+ * @param src source byte array of initial values
+ * @param offset start offset to copy bytes
+ * @param length number of bytes to copy
+ * @return a new byte array of size length, containing values from src
+ * starting from offset in the src array.
+ */
public static byte[] copy(byte[] src, int offset, int length) {
byte[] copy = new byte[length];
System.arraycopy(src, offset, copy, 0, length);
return copy;
}
- public static boolean cmp(byte[] input, byte[] want) {
- if(input.length != want.length) {
+
+ /**
+ * Compare two byte arrays
+ * @param a byte array to compare
+ * @param b byte array to compare
+ * @return true if a and b have same length, and all the same values, false
+ * otherwise
+ */
+ public static boolean cmp(byte[] a, byte[] b) {
+ if(a.length != b.length) {
return false;
}
- for(int i = 0; i < input.length; i++) {
- if(input[i] != want[i]) {
+ for(int i = 0; i < a.length; i++) {
+ if(a[i] != b[i]) {
return false;
}
}
return true;
}
+ /**
+ * throw away all bytes from stream argument
+ * @param is InputStream to read and discard
+ * @throws IOException when is throws one
+ */
public static void discardStream(InputStream is) throws IOException {
discardStream(is,BUFFER_SIZE);
}
+
+ /**
+ * throw away all bytes from stream argument
+ * @param is InputStream to read and discard
+ * @param size number of bytes to read at once from the stream
+ * @throws IOException when is throws one
+ */
public static void discardStream(InputStream is,int size) throws IOException {
byte[] buffer = new byte[size];
while(is.read(buffer, 0, size) != -1) {
}
}
+
+ /**
+ * throw away all bytes from stream argument, and count how many bytes were
+ * discarded before reaching the end of the stream.
+ * @param is InputStream to read and discard
+ * @return the number of bytes discarded
+ * @throws IOException when is throws one
+ */
+ public static long discardStreamCount(InputStream is) throws IOException {
+ return discardStreamCount(is, BUFFER_SIZE);
+ }
+
+ /**
+ * throw away all bytes from stream argument, and count how many bytes were
+ * discarded before reaching the end of the stream.
+ * @param is InputStream to read and discard
+ * @param size number of bytes to read at once from the stream
+ * @return the number of bytes discarded
+ * @throws IOException when is throws one
+ */
+ public static long discardStreamCount(InputStream is,int size) throws IOException {
+ long count = 0;
+ byte[] buffer = new byte[size];
+ int amt = 0;
+ while((amt = is.read(buffer, 0, size)) != -1) {
+ count += amt;
+ }
+ return count;
+ }
/**
* Write all bytes from is to os. Does not close either stream.
@@ -67,6 +133,7 @@
throws IOException {
copyStream(is,os,BUFFER_SIZE);
}
+
/**
* Write all bytes from is to os. Does not close either stream.
* @param is to copy bytes from
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-13 18:34:44
|
Revision: 3090
http://archive-access.svn.sourceforge.net/archive-access/?rev=3090&view=rev
Author: bradtofel
Date: 2010-05-13 18:34:37 +0000 (Thu, 13 May 2010)
Log Message:
-----------
FEATURE: added new method stripDefaultPort() and tests for that
TWEAK: Updated TLD list
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java
trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-05-07 23:11:24 UTC (rev 3089)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-05-13 18:34:37 UTC (rev 3090)
@@ -101,31 +101,29 @@
*/
public final static char PATH_START = '/';
-
- private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" +
- "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" +
- "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" +
- "|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo" +
- "|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk" +
- "|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg" +
- "|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma" +
- "|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz" +
- "|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm" +
- "|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj" +
- "|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn" +
- "|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu" +
- "|wf|ws|ye|yt|yu|za|zm|zw";
-
- private static final String GEN_TLDS = "aero|biz|cat|com|coop|edu|gov" +
- "|info|int|jobs|mil|mobi|museum|name|net|org|pro|travel";
-
-
- private static final String ALL_TLD_PATTERN = CC_TLDS + "|" + GEN_TLDS;
+ private static final String ALL_TLDS = "ac|ad|ae|aero|af|ag|ai|al|am|an" +
+ "|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi" +
+ "|biz|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci" +
+ "|ck|cl|cm|cn|co|com|coop|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec" +
+ "|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh" +
+ "|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id" +
+ "|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh" +
+ "|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc" +
+ "|md|me|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum" +
+ "|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nu|nz" +
+ "|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pro|ps|pt|pw|py|qa|re|ro" +
+ "|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv" +
+ "|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv" +
+ "|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d" +
+ "|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad" +
+ "|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp" +
+ "|xn--kgbechtv|xn--mgbaam7a8h|xn--mgberp4a5d4ar|xn--p1ai" +
+ "|xn--wgbh1c|xn--zckzah|ye|yt|za|zm|zw";
private static final String IP_PATTERN = "[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+";
private static final Pattern AUTHORITY_REGEX =
- Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLD_PATTERN + "))|" +
+ Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLDS + "))|" +
"(" + IP_PATTERN + ")");
// private static final Pattern AUTHORITY_REGEX_SIMPLE =
@@ -244,7 +242,48 @@
return url.substring(pathIdx);
}
}
+
+ /**
+ * Attempt to strip default ports out of URL strings.
+ * @param url the original URL possibly including a port
+ * @return the URL sans port, if the scheme was recognized and the default
+ * port was supplied, otherwise, the original URL.
+ */
+ public static String stripDefaultPortFromUrl(String url) {
+ String scheme = urlToScheme(url);
+ if(scheme == null) {
+ return url;
+ }
+ int defaultPort = schemeToDefaultPort(scheme);
+ if(defaultPort == -1) {
+ return url;
+ }
+ String portStr = null;
+ // is there a slash after the scheme?
+ int slashIdx = url.indexOf('/', scheme.length());
+ if(slashIdx == -1) {
+ portStr = String.format(":%d", defaultPort);
+ if(url.endsWith(portStr)) {
+ return url.substring(0,url.length() - portStr.length());
+ }
+ }
+ portStr = String.format(":%d/", defaultPort);
+ int idx = url.indexOf(portStr);
+ if(idx == -1) {
+ return url;
+ }
+ // if that occurred before the first / (after the scheme) then strip it:
+ if(slashIdx < idx) {
+ return url;
+ }
+ // we want to strip out the portStr:
+ StringBuilder sb = new StringBuilder(url.length());
+ sb.append(url.substring(0,idx));
+ sb.append(url.substring(idx + (portStr.length()-1)));
+ return sb.toString();
+ }
+
/**
* Attempt to extract the hostname component of an absolute URL argument.
* @param url the url String from which to extract the hostname
Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-07 23:11:24 UTC (rev 3089)
+++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-13 18:34:37 UTC (rev 3090)
@@ -161,4 +161,35 @@
}
+
+ public void testStripDefaultPort() {
+ assertSDP("http://foo.com/","http://foo.com/");
+ assertSDP("http://foo.com","http://foo.com");
+ assertSDP("http://foo.com","http://foo.com:80");
+ assertSDP("foo.com:80/","foo.com:80/");
+ assertSDP("http://foo.com:8080/","http://foo.com:8080/");
+ assertSDP("http://foo.com:8081/","http://foo.com:8081/");
+ assertSDP("https://foo.com:8081/","https://foo.com:8081/");
+ assertSDP("https://foo.com/","https://foo.com:443/");
+ assertSDP("https://foo.com","https://foo.com:443");
+ assertSDP("ftp://foo.com/","ftp://foo.com/");
+ assertSDP("ftp://foo.com","ftp://foo.com");
+ assertSDP("ftp://foo.com:1234","ftp://foo.com:1234");
+ assertSDP("ftp://foo.com","ftp://foo.com:21");
+ assertSDP("ftp://foo.com/","ftp://foo.com:21/");
+ assertSDP("ftp://foo.com/bla","ftp://foo.com:21/bla");
+ assertSDP("s3://foo.com/","s3://foo.com/");
+ assertSDP("s3://foo.com/bar","s3://foo.com/bar");
+ assertSDP("s3://foo.com:80/bar","s3://foo.com:80/bar");
+ assertSDP("http://b@foo.com/bar","http://b@foo.com:80/bar");
+ assertSDP("http://b@foo.com/bar","http://b@foo.com/bar");
+ assertSDP("http://b:80...@fo.../bar","http://b:80...@fo.../bar");
+ assertSDP("http://b:80...@fo.../bar","http://b:80...@fo...:80/bar");
+ assertSDP("http://b:80...@fo...:8080/ba","http://b:80...@fo...:8080/ba");
+ }
+ private void assertSDP(String want, String orig) {
+ String got = UrlOperations.stripDefaultPortFromUrl(orig);
+ assertEquals(want,got);
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-05-07 23:11:33
|
Revision: 3089
http://archive-access.svn.sourceforge.net/archive-access/?rev=3089&view=rev
Author: bradtofel
Date: 2010-05-07 23:11:24 +0000 (Fri, 07 May 2010)
Log Message:
-----------
BUGFIX: was setting path prefix to "//" for requests to "/"
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java 2010-05-07 23:10:16 UTC (rev 3088)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java 2010-05-07 23:11:24 UTC (rev 3089)
@@ -148,7 +148,7 @@
portMapper.addRequestHandler(host, path, requestHandler);
}
- private RequestHandlerContext mapRequest(HttpServletRequest request) {
+ public RequestHandlerContext mapRequest(HttpServletRequest request) {
RequestHandlerContext handlerContext = null;
int port = request.getLocalPort();
@@ -181,8 +181,12 @@
if(handlerContext != null) {
RequestHandler requestHandler =
handlerContext.getRequestHandler();
- request.setAttribute(REQUEST_CONTEXT_PREFIX,
- handlerContext.getPathPrefix() + "/");
+ // need to add trailing "/" iff prefix is not "/":
+ String pathPrefix = handlerContext.getPathPrefix();
+ if(!pathPrefix.equals("/")) {
+ pathPrefix += "/";
+ }
+ request.setAttribute(REQUEST_CONTEXT_PREFIX,pathPrefix);
handled = requestHandler.handleRequest(request, response);
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3088
http://archive-access.svn.sourceforge.net/archive-access/?rev=3088&view=rev
Author: bradtofel
Date: 2010-05-07 23:10:16 +0000 (Fri, 07 May 2010)
Log Message:
-----------
FEATURE: now by default, blocks "alexa/dat" MIME records which may have gotten into the index...
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java 2010-05-07 23:09:30 UTC (rev 3087)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java 2010-05-07 23:10:16 UTC (rev 3088)
@@ -32,6 +32,7 @@
import org.archive.wayback.resourceindex.filters.ConditionalGetAnnotationFilter;
import org.archive.wayback.resourceindex.filters.DuplicateRecordFilter;
import org.archive.wayback.resourceindex.filters.GuardRailFilter;
+import org.archive.wayback.resourceindex.filters.MimeTypeFilter;
import org.archive.wayback.resourceindex.filters.UserInfoInAuthorityFilter;
import org.archive.wayback.resourceindex.filters.WARCRevisitAnnotationFilter;
import org.archive.wayback.util.ObjectFilter;
@@ -39,6 +40,8 @@
public class CoreCaptureFilterGroup implements CaptureFilterGroup {
private ObjectFilterChain<CaptureSearchResult> chain = null;
+ private MimeTypeFilter mimeExcludeFilter = new MimeTypeFilter();
+ private static String ALEXA_DAT_MIME = "alexa/dat";
public CoreCaptureFilterGroup(LocalResourceIndex index) {
chain = new ObjectFilterChain<CaptureSearchResult>();
@@ -48,7 +51,11 @@
chain.addFilter(new WARCRevisitAnnotationFilter());
chain.addFilter(new ConditionalGetAnnotationFilter());
}
+ MimeTypeFilter mimeExcludeFilter = new MimeTypeFilter();
+ mimeExcludeFilter.addMime(ALEXA_DAT_MIME);
+ mimeExcludeFilter.setIncludeIfContains(false);
chain.addFilter(new UserInfoInAuthorityFilter());
+ chain.addFilter(mimeExcludeFilter);
}
public List<ObjectFilter<CaptureSearchResult>> getFilters() {
return chain.getFilters();
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3087
http://archive-access.svn.sourceforge.net/archive-access/?rev=3087&view=rev
Author: bradtofel
Date: 2010-05-07 23:09:30 +0000 (Fri, 07 May 2010)
Log Message:
-----------
FEATURE: added an inverting "includeIfContains" setting, which allows blocking of specific MIME-TYPEs from the index
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java 2010-05-07 23:07:58 UTC (rev 3086)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java 2010-05-07 23:09:30 UTC (rev 3087)
@@ -38,6 +38,7 @@
*/
public class MimeTypeFilter implements ObjectFilter<CaptureSearchResult> {
private HashMap<String,Integer> validMimes = null;
+ private boolean includeIfContains = true;
/**
* @param mime String which is valid match for mime-type field
@@ -46,7 +47,7 @@
if(validMimes == null) {
validMimes = new HashMap<String, Integer>();
}
- validMimes.put(mime.toLowerCase(),new Integer(1));
+ validMimes.put(mime.toLowerCase(),null);
}
/* (non-Javadoc)
@@ -54,6 +55,21 @@
*/
public int filterObject(CaptureSearchResult r) {
String mime = r.getMimeType().toLowerCase();
- return validMimes.containsKey(mime) ? FILTER_INCLUDE : FILTER_EXCLUDE;
+ return validMimes.containsKey(mime) == includeIfContains ?
+ FILTER_INCLUDE : FILTER_EXCLUDE;
}
+
+ /**
+ * @return the includeIfContains
+ */
+ public boolean isIncludeIfContains() {
+ return includeIfContains;
+ }
+
+ /**
+ * @param includeIfContains the includeIfContains to set
+ */
+ public void setIncludeIfContains(boolean includeIfContains) {
+ this.includeIfContains = includeIfContains;
+ }
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3086
http://archive-access.svn.sourceforge.net/archive-access/?rev=3086&view=rev
Author: bradtofel
Date: 2010-05-07 23:07:58 +0000 (Fri, 07 May 2010)
Log Message:
-----------
JAVADOC
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-05-07 23:04:35 UTC (rev 3085)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-05-07 23:07:58 UTC (rev 3086)
@@ -27,12 +27,19 @@
import org.archive.wayback.replay.html.ReplayParseContext;
import org.archive.wayback.replay.html.StringTransformer;
+/**
+ * @author brad
+ *
+ */
public class URLStringTransformer implements StringTransformer {
private static final String MAILTO_PREFIX = "mailto:";
private String flags;
- public URLStringTransformer() {
-
- }
+ /** Default constructor */
+ public URLStringTransformer() {}
+ /**
+ * Flag-setting constructor
+ * @param flags flags to pass to ReplayParseContext.contextualizeUrl()
+ */
public URLStringTransformer(String flags) {
this.flags = flags;
}
@@ -44,16 +51,12 @@
return context.contextualizeUrl(url, flags);
}
- /**
- * @return the flags
- */
+ /** @return the flags */
public String getFlags() {
return flags;
}
- /**
- * @param flags the flags to set
- */
+ /** @param flags the flags to set */
public void setFlags(String flags) {
this.flags = flags;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3085
http://archive-access.svn.sourceforge.net/archive-access/?rev=3085&view=rev
Author: bradtofel
Date: 2010-05-07 23:04:35 +0000 (Fri, 07 May 2010)
Log Message:
-----------
TWEAK: added constructor which allows flags to be set at construction time
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-04-30 21:15:45 UTC (rev 3084)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-05-07 23:04:35 UTC (rev 3085)
@@ -30,6 +30,12 @@
public class URLStringTransformer implements StringTransformer {
private static final String MAILTO_PREFIX = "mailto:";
private String flags;
+ public URLStringTransformer() {
+
+ }
+ public URLStringTransformer(String flags) {
+ this.flags = flags;
+ }
public String transform(ReplayParseContext context, String url) {
if(url.startsWith(MAILTO_PREFIX)) {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-04-30 21:15:53
|
Revision: 3084
http://archive-access.svn.sourceforge.net/archive-access/?rev=3084&view=rev
Author: bradtofel
Date: 2010-04-30 21:15:45 +0000 (Fri, 30 Apr 2010)
Log Message:
-----------
FEATURE: made HTTP Header guessedCharsetHeader field bean configurable, and changed it's default value.
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-04-30 02:16:27 UTC (rev 3083)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-04-30 21:15:45 UTC (rev 3084)
@@ -50,6 +50,9 @@
*/
public abstract class TextReplayRenderer implements ReplayRenderer {
+ private static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset";
+
+ private String guessedCharsetHeader = GUESSED_CHARSET_HEADER;
private List<String> jspInserts = null;
private HttpHeaderProcessor httpHeaderProcessor;
private CharsetDetector charsetDetector = new StandardCharsetDetector();
@@ -89,16 +92,20 @@
// set the corrected length:
int bytes = page.getBytes().length;
+
headers.put(HttpHeaderOperation.HTTP_LENGTH_HEADER, String.valueOf(bytes));
+ if(guessedCharsetHeader != null) {
+ headers.put(guessedCharsetHeader, page.getCharSet());
+ }
+
+ // send back the headers:
+ HttpHeaderOperation.sendHeaders(headers, httpResponse);
+
// Tomcat will always send a charset... It's trying to be smarter than
// we are. If the original page didn't include a "charset" as part of
// the "Content-Type" HTTP header, then Tomcat will use the default..
// who knows what that is, or what that will do to the page..
// let's try explicitly setting it to what we used:
- headers.put("X-Wayback-Guessed-Charset", page.getCharSet());
-
- // send back the headers:
- HttpHeaderOperation.sendHeaders(headers, httpResponse);
httpResponse.setCharacterEncoding(page.getCharSet());
page.writeToOutputStream(httpResponse.getOutputStream());
@@ -131,4 +138,21 @@
public void setCharsetDetector(CharsetDetector charsetDetector) {
this.charsetDetector = charsetDetector;
}
+
+ /**
+ * @return the String HTTP Header used to indicate what Wayback determined
+ * was the pages original charset
+ */
+ public String getGuessedCharsetHeader() {
+ return guessedCharsetHeader;
+ }
+
+ /**
+ * @param guessedCharsetHeader the String HTTP Header value used to indicate
+ * to clients what Wayback determined was the pages original charset. If set
+ * to null, the header will be omitted.
+ */
+ public void setGuessedCharsetHeader(String guessedCharsetHeader) {
+ this.guessedCharsetHeader = guessedCharsetHeader;
+ }
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3083
http://archive-access.svn.sourceforge.net/archive-access/?rev=3083&view=rev
Author: bradtofel
Date: 2010-04-30 02:16:27 +0000 (Fri, 30 Apr 2010)
Log Message:
-----------
TWEAK: removed System.out.format() debug line...
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-30 02:14:51 UTC (rev 3082)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-30 02:16:27 UTC (rev 3083)
@@ -62,23 +62,23 @@
private static final TimeZone TZ_UTC = TimeZone.getTimeZone("UTC");
- private static String joinInts(int[] a) {
- StringBuilder sb = new StringBuilder();
- boolean first = true;
- for(int i : a) {
- if(first) {
- sb.append(i);
- first = false;
- } else {
- sb.append(",").append(i);
- }
- }
- return sb.toString();
- }
- private static void printAr(String name, int o[], int n[]) {
- System.out.format("%s=========\nORIG(%s)\nNORM(%s)\n",
- name,joinInts(o),joinInts(n));
- }
+// private static String joinInts(int[] a) {
+// StringBuilder sb = new StringBuilder();
+// boolean first = true;
+// for(int i : a) {
+// if(first) {
+// sb.append(i);
+// first = false;
+// } else {
+// sb.append(",").append(i);
+// }
+// }
+// return sb.toString();
+// }
+// private static void printAr(String name, int o[], int n[]) {
+// System.out.format("%s=========\nORIG(%s)\nNORM(%s)\n",
+// name,joinInts(o),joinInts(n));
+// }
private static int normalizeInt(int input, int localMax, int maxOutput) {
double ln = Math.log(localMax);
@@ -108,7 +108,7 @@
if(input[i] > localMax) localMax = input[i];
}
if(localMax < max) {
- printAr("No normalization",input,input);
+// printAr("No normalization",input,input);
return input;
}
int normalized[] = new int[input.length];
@@ -123,8 +123,8 @@
double pct = iln / ln;
double num = pct * max;
int idx = (int) num;
- System.out.format("%d - %d - %f - %f - %f - %f : %d\n",
- i,input[i],ln,iln,pct,num,idx);
+// System.out.format("%d - %d - %f - %f - %f - %f : %d\n",
+// i,input[i],ln,iln,pct,num,idx);
if(input[i] < idx) {
normalized[i] = input[i];
} else {
@@ -132,7 +132,7 @@
}
}
}
- printAr("NORMALIZED",input,normalized);
+// printAr("NORMALIZED",input,normalized);
return normalized;
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
Revision: 3082
http://archive-access.svn.sourceforge.net/archive-access/?rev=3082&view=rev
Author: bradtofel
Date: 2010-04-30 02:14:51 +0000 (Fri, 30 Apr 2010)
Log Message:
-----------
TWEAK: removed System.out.format() debug line...
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-27 22:47:37 UTC (rev 3081)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-30 02:14:51 UTC (rev 3082)
@@ -92,8 +92,8 @@
double pct = iln / ln;
double num = pct * maxOutput;
int idx = (int) num;
- System.out.format("%d - %f - %f - %f - %f : %d\n",
- input,ln,iln,pct,num,idx);
+// System.out.format("%d - %f - %f - %f - %f : %d\n",
+// input,ln,iln,pct,num,idx);
if(input < idx) {
return input;
} else {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <bra...@us...> - 2010-04-27 22:47:45
|
Revision: 3081
http://archive-access.svn.sourceforge.net/archive-access/?rev=3081&view=rev
Author: bradtofel
Date: 2010-04-27 22:47:37 +0000 (Tue, 27 Apr 2010)
Log Message:
-----------
MAJOR REFACTOR of AccessPoint ServletContext linkage. Tons of code moved around, with a goal of not actually busting Wayback XML Spring configuration too badly..
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -33,13 +33,13 @@
import org.apache.commons.httpclient.URIException;
import org.apache.log4j.Logger;
-import org.archive.wayback.webapp.ServletRequestContext;
+import org.archive.wayback.util.webapp.AbstractRequestHandler;
/**
* @author brad
*
*/
-public class ARCRecordingProxy extends ServletRequestContext {
+public class ARCRecordingProxy extends AbstractRequestHandler {
private final static String EXPIRES_HEADER = "Expires";
private long expiresMS = 60 * 60 * 1000;
@@ -52,10 +52,7 @@
private ARCCacheDirectory arcCacheDir = null;
private URLtoARCCacher cacher = null;
- /* (non-Javadoc)
- * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
- */
- @Override
+
public boolean handleRequest(HttpServletRequest httpRequest,
HttpServletResponse httpResponse) throws ServletException,
IOException {
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -40,7 +40,7 @@
import org.apache.commons.httpclient.methods.GetMethod;
import org.archive.io.arc.ARCRecord;
import org.archive.wayback.util.ByteOp;
-import org.archive.wayback.webapp.ServletRequestContext;
+import org.archive.wayback.util.webapp.AbstractRequestHandler;
/**
*
@@ -54,7 +54,7 @@
* @author brad
*
*/
-public class ARCUnwrappingProxy extends ServletRequestContext {
+public class ARCUnwrappingProxy extends AbstractRequestHandler {
private MultiThreadedHttpConnectionManager connectionManager = null;
private HostConfiguration hostConfiguration = null;
@@ -66,13 +66,6 @@
hostConfiguration = new HostConfiguration();
}
-// protected HttpClient http = new HttpClient(
-// new MultiThreadedHttpConnectionManager());
-
- /* (non-Javadoc)
- * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
- */
- @Override
public boolean handleRequest(HttpServletRequest httpRequest,
HttpServletResponse httpResponse) throws ServletException,
IOException {
@@ -81,7 +74,6 @@
if(query != null) {
sb.append("?").append(query);
}
-// URL url = new URL(sb.toString());
HttpMethod method = new GetMethod(sb.toString());
// method.addRequestHeader("User-Agent", userAgent);
boolean got200 = false;
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -78,34 +78,6 @@
AccessPoint wbContext) throws BadQueryException,
BetterRequestException;
- protected static String getMapParam(Map<String,String[]> queryMap,
- String field) {
- String arr[] = queryMap.get(field);
- if (arr == null || arr.length == 0) {
- return null;
- }
- return arr[0];
- }
-
- protected static String getRequiredMapParam(Map<String,String[]> queryMap,
- String field)
- throws BadQueryException {
- String value = getMapParam(queryMap,field);
- if(value == null) {
- throw new BadQueryException("missing field " + field);
- }
- if(value.length() == 0) {
- throw new BadQueryException("empty field " + field);
- }
- return value;
- }
-
- protected static String getMapParamOrEmpty(Map<String,String[]> map,
- String param) {
- String val = getMapParam(map,param);
- return (val == null) ? "" : val;
- }
-
/**
* @return the maxRecords to use with this RequestParser, when not specified
* by the client request
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -59,7 +59,7 @@
* WaybackRequest object, except the Submit button argument.
*/
public WaybackRequest parse(HttpServletRequest httpRequest,
- AccessPoint wbContext) {
+ AccessPoint accessPoint) {
WaybackRequest wbRequest = null;
@SuppressWarnings("unchecked")
@@ -67,7 +67,7 @@
if(queryMap.size() > 0) {
wbRequest = new WaybackRequest();
- String base = wbContext.translateRequestPath(httpRequest);
+ String base = accessPoint.translateRequestPath(httpRequest);
if(base.startsWith(REPLAY_BASE)) {
wbRequest.setReplayRequest();
} else if(base.startsWith(QUERY_BASE)) {
@@ -88,7 +88,7 @@
continue;
}
// just jam everything else in:
- String val = getMapParam(queryMap,key);
+ String val = AccessPoint.getMapParam(queryMap,key);
wbRequest.put(key,val);
}
String partialTS = wbRequest.getReplayTimestamp();
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -88,7 +88,7 @@
WaybackRequest wbRequest = null;
@SuppressWarnings("unchecked")
Map<String,String[]> queryMap = httpRequest.getParameterMap();
- String query = getMapParam(queryMap, SEARCH_QUERY);
+ String query = AccessPoint.getMapParam(queryMap, SEARCH_QUERY);
if(query == null) {
return null;
}
@@ -107,8 +107,8 @@
return null;
}
- String numResults = getMapParam(queryMap, SEARCH_RESULTS);
- String startPage = getMapParam(queryMap, START_PAGE);
+ String numResults = AccessPoint.getMapParam(queryMap, SEARCH_RESULTS);
+ String startPage = AccessPoint.getMapParam(queryMap, START_PAGE);
if (numResults != null) {
int nr = Integer.parseInt(numResults);
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -33,7 +33,7 @@
/**
* Subclass of RequestParser that acquires key request information from the
- * path component following the wayback context.
+ * path component within the handling AccessPoint.
*
* @author brad
* @version $Date$, $Revision$
@@ -41,43 +41,35 @@
public abstract class PathRequestParser extends WrappedRequestParser {
/**
- * @param wrapped
+ * @param wrapped the BaseRequestParser being wrapped
*/
public PathRequestParser(BaseRequestParser wrapped) {
super(wrapped);
}
/**
- * @param requestPath
- * @param acessPoint
- * @return WaybackRequest with information parsed from the requestPath, or
- * null if information could not be extracted.
- * @throws BetterRequestException
- */
+ * attempt to transform an incoming HttpServletRequest into a
+ * WaybackRequest object. returns null if there is missing information.
+ *
+ * @param requestPath the AccessPoint relative path as received by the
+ * AccessPoint
+ * @param accessPoint AccessPoint which is attempting to parse the request
+ * @return populated WaybackRequest object if successful, null otherwise.
+ * @throws BadQueryException if the request could match this AccessPoint,
+ * but is malformed: invalid datespec, URL, or flags
+ * @throws BetterRequestException if the request should be redirected to
+ * provide better user feedback (corrected URL/date in address bar)
+ */
public abstract WaybackRequest parse(String requestPath,
- AccessPoint acessPoint) throws BetterRequestException;
+ AccessPoint accessPoint) throws BetterRequestException,
+ BadQueryException;
- /* (non-Javadoc)
- * @see org.archive.wayback.requestparser.BaseRequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext)
- */
- @Override
public WaybackRequest parse(HttpServletRequest httpRequest,
- AccessPoint acessPoint)
+ AccessPoint accessPoint)
throws BadQueryException, BetterRequestException {
- String queryString = httpRequest.getQueryString();
- String origRequestPath = httpRequest.getRequestURI();
-
- if (queryString != null) {
- origRequestPath += "?" + queryString;
- }
- String contextPath = acessPoint.getContextPath(httpRequest);
- if (!origRequestPath.startsWith(contextPath)) {
- return null;
- }
- String requestPath = origRequestPath.substring(contextPath.length());
-
- WaybackRequest wbRequest = parse(requestPath, acessPoint);
+ String requestPath = accessPoint.translateRequestPathQuery(httpRequest);
+ WaybackRequest wbRequest = parse(requestPath, accessPoint);
if(wbRequest != null) {
wbRequest.setResultsPerPage(getMaxRecords());
}
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -45,7 +45,7 @@
import org.archive.util.anvl.ANVLRecord;
import org.archive.wayback.util.http.HttpRequestMessage;
import org.archive.wayback.util.http.HttpResponse;
-import org.archive.wayback.webapp.ServletRequestContext;
+import org.archive.wayback.util.webapp.AbstractRequestHandler;
/**
* ServletRequestContext interface which uses a ResourceFileLocationDB to
@@ -56,7 +56,7 @@
* @author brad
* @version $Date$, $Revision$
*/
-public class FileProxyServlet extends ServletRequestContext {
+public class FileProxyServlet extends AbstractRequestHandler {
private static final Logger LOGGER = Logger.getLogger(FileProxyServlet.class
.getName());
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -34,7 +34,8 @@
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
-import org.archive.wayback.webapp.ServletRequestContext;
+import org.archive.wayback.exception.BadQueryException;
+import org.archive.wayback.util.webapp.AbstractRequestHandler;
/**
* ServletRequestContext enabling remote HTTP GET/POST access to a local
@@ -44,7 +45,7 @@
* @author brad
* @version $Date$, $Revision$
*/
-public class ResourceFileLocationDBServlet extends ServletRequestContext {
+public class ResourceFileLocationDBServlet extends AbstractRequestHandler {
protected static final String OPERATION_ARGUMENT = "operation";
protected static final String NAME_ARGUMENT = "name";
@@ -77,18 +78,22 @@
e.printStackTrace();
httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST,
e.getMessage());
+ } catch(BadQueryException e) {
+ e.printStackTrace();
+ httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST,
+ e.getMessage());
}
return true;
}
private String handleOperation(Map<String,String[]> queryMap)
- throws ParseException {
+ throws ParseException, BadQueryException {
- String operation = getRequiredMapParam(queryMap, OPERATION_ARGUMENT);
+ String operation = AbstractRequestHandler.getRequiredMapParam(queryMap, OPERATION_ARGUMENT);
String message;
try {
if (operation.equals(LOOKUP_OPERATION)) {
- String name = getRequiredMapParam(queryMap, NAME_ARGUMENT);
+ String name = AbstractRequestHandler.getRequiredMapParam(queryMap, NAME_ARGUMENT);
message = NO_LOCATION_PREFIX + " " + name;
String arcUrls[] = locationDB.nameToUrls(name);
@@ -107,8 +112,8 @@
} else if (operation.equals(GETRANGE_OPERATION)) {
- long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT));
- long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT));
+ long start = Long.parseLong(AbstractRequestHandler.getRequiredMapParam(queryMap, START_ARGUMENT));
+ long end = Long.parseLong(AbstractRequestHandler.getRequiredMapParam(queryMap, END_ARGUMENT));
Iterator<String> itr = locationDB.getNamesBetweenMarks(start,end);
StringBuilder str = new StringBuilder();
str.append("OK ");
@@ -120,8 +125,8 @@
} else {
- String name = getRequiredMapParam(queryMap, NAME_ARGUMENT);
- String url = getRequiredMapParam(queryMap, URL_ARGUMENT);
+ String name = AbstractRequestHandler.getRequiredMapParam(queryMap, NAME_ARGUMENT);
+ String url = AbstractRequestHandler.getRequiredMapParam(queryMap, URL_ARGUMENT);
if (operation.equals(ADD_OPERATION)) {
locationDB.addNameUrl(name, url);
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -40,7 +40,6 @@
import org.archive.wayback.ReplayRenderer;
import org.archive.wayback.RequestParser;
import org.archive.wayback.ResultURIConverter;
-import org.archive.wayback.WaybackConstants;
import org.archive.wayback.accesscontrol.ExclusionFilterFactory;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.CaptureSearchResults;
@@ -58,7 +57,8 @@
import org.archive.wayback.exception.WaybackException;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
import org.archive.wayback.util.operator.BooleanOperator;
-import org.springframework.beans.factory.BeanNameAware;
+import org.archive.wayback.util.webapp.AbstractRequestHandler;
+import org.archive.wayback.util.webapp.ShutdownListener;
/**
* Retains all information about a particular Wayback configuration
@@ -77,265 +77,38 @@
* @author brad
* @version $Date$, $Revision$
*/
-public class AccessPoint implements RequestContext, BeanNameAware {
+public class AccessPoint extends AbstractRequestHandler
+implements ShutdownListener {
private static final Logger LOGGER = Logger.getLogger(
AccessPoint.class.getName());
- private String liveWebPrefix = null;
-
- private boolean useServerName = false;
- private boolean useAnchorWindow = false;
- private boolean exactSchemeMatch = true;
private boolean exactHostMatch = false;
+ private boolean exactSchemeMatch = true;
+ private boolean useAnchorWindow = false;
+ private boolean useServerName = false;
- private int contextPort = 0;
- private String contextName = null;
- private String beanName = null;
- private WaybackCollection collection = null;
- private ReplayDispatcher replay = null;
- private ExceptionRenderer exception = new BaseExceptionRenderer();
- private QueryRenderer query = null;
- private RequestParser parser = null;
- private ResultURIConverter uriConverter = null;
- private Properties configs = null;
- private ExclusionFilterFactory exclusionFactory = null;
- private BooleanOperator<WaybackRequest> authentication = null;
+ private String liveWebPrefix = null;
private String urlRoot = null;
+
private Locale locale = null;
+
+ private Properties configs = null;
+
private List<String> filePatterns = null;
private List<String> filePrefixes = null;
-
- /**
- * @return List of file patterns that will be matched when querying the
- * ResourceIndex
- */
- public List<String> getFilePatterns() {
- return filePatterns;
- }
- /**
- * @param filePatterns List of file Patterns (regular expressions) that
- * will be matched when querying the ResourceIndex - only SearchResults
- * matching one of these patterns will be returned.
- */
- public void setFilePatterns(List<String> filePatterns) {
- this.filePatterns = filePatterns;
- }
+ private WaybackCollection collection = null;
+ private ExceptionRenderer exception = new BaseExceptionRenderer();
+ private QueryRenderer query = null;
+ private RequestParser parser = null;
+ private ReplayDispatcher replay = null;
+ private ResultURIConverter uriConverter = null;
- /**
- * @return List of file String prefixes that will be matched when querying
- * the ResourceIndex
- */
- public List<String> getFilePrefixes() {
- return filePrefixes;
- }
-
- /**
- * @param filePrefixes List of String file prefixes that will be matched
- * when querying the ResourceIndex - only SearchResults from files
- * with a prefix matching one of those in this List will be returned.
- */
- public void setFilePrefixes(List<String> filePrefixes) {
- this.filePrefixes = filePrefixes;
- }
-
- /**
- * @return the contextName
- */
- public String getContextName() {
- return contextName;
- }
-
- /**
- * @return the replay
- */
- public ReplayDispatcher getReplay() {
- return replay;
- }
-
- /**
- * @return the query
- */
- public QueryRenderer getQuery() {
- return query;
- }
-
- /**
- * @return the parser
- */
- public RequestParser getParser() {
- return parser;
- }
-
- /**
- * @return the uriConverter
- */
- public ResultURIConverter getUriConverter() {
- return uriConverter;
- }
-
- /**
- * @return explicit Locale to use within this AccessPoint.
- */
- public Locale getLocale() {
- return locale;
- }
-
- /**
- * @param locale explicit Locale to use for requests within this
- * AccessPoint. If not set, will attempt to use the one specified by
- * each requests User Agent via HTTP headers
- */
- public void setLocale(Locale locale) {
- this.locale = locale;
- }
-
- /**
- *
- */
- public AccessPoint() {
-
- }
+ private ExclusionFilterFactory exclusionFactory = null;
+ private BooleanOperator<WaybackRequest> authentication = null;
- /* (non-Javadoc)
- * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String)
- */
- public void setBeanName(String beanName) {
- this.beanName = beanName;
- this.contextName = "";
- int idx = beanName.indexOf(":");
- if(idx > -1) {
- contextPort = Integer.valueOf(beanName.substring(0,idx));
- contextName = beanName.substring(idx + 1);
- } else {
- try {
- this.contextPort = Integer.valueOf(beanName);
- } catch(NumberFormatException e) {
- e.printStackTrace();
- }
- }
- }
- /**
- * @return the name of the bean in the Spring configuration which defined
- * this AccessPoint.
- */
- public String getBeanName() {
- return beanName;
- }
- /**
- * @param httpRequest HttpServletRequest which is being handled
- * @return the prefix of paths received by this server that are handled by
- * this WaybackContext, including the trailing '/'
- */
- public String getContextPath(HttpServletRequest httpRequest) {
- String httpContextPath = httpRequest.getContextPath();
- if(contextName.length() == 0) {
- return httpContextPath + "/";
- }
- return httpContextPath + "/" + contextName + "/";
- }
- /**
- * Remove any leading ServletContext and AccessPoint name path elements
- * from the incoming request path, returning the result as a String
- *
- * @param httpRequest HttpServletRequest which is being handled
- * @param includeQuery if true, include any query arguments
- * @return the portion of the request following the path to this context
- * without leading '/'
- */
- protected String translateRequest(HttpServletRequest httpRequest,
- boolean includeQuery) {
-
- String origRequestPath = httpRequest.getRequestURI();
- if(includeQuery) {
- String queryString = httpRequest.getQueryString();
- if (queryString != null) {
- origRequestPath += "?" + queryString;
- }
- }
- String contextPath = getContextPath(httpRequest);
- if (!origRequestPath.startsWith(contextPath)) {
- if(contextPath.startsWith(origRequestPath)) {
- // missing trailing '/', just omit:
- return "";
- }
- return null;
- }
- return origRequestPath.substring(contextPath.length());
- }
-
- /**
- * Remove any leading ServletContext and AccessPoint name path elements
- * from the incoming request path, returning the result as a String
-
- * @param httpRequest HttpServletRequest which is being handled
- * @return the portion of the request following the path to this context,
- * including any query information,without leading '/'
- */
- public String translateRequestPathQuery(HttpServletRequest httpRequest) {
- return translateRequest(httpRequest,true);
- }
-
- /**
- * @param httpRequest HttpServletRequest which is being handled
- * @return the portion of the request following the path to this context,
- * excluding any query information, without leading '/'
- */
- public String translateRequestPath(HttpServletRequest httpRequest) {
- return translateRequest(httpRequest,false);
- }
-
- /**
- * Construct an absolute URL that points to the root of the context that
- * received the request, including a trailing "/".
- *
- * @return String absolute URL pointing to the Context root where the
- * request was received.
- */
- private String getAbsoluteContextPrefix(HttpServletRequest httpRequest,
- boolean useRequestServer) {
-
- StringBuilder prefix = new StringBuilder();
- prefix.append(WaybackConstants.HTTP_URL_PREFIX);
- String waybackPort = null;
- if(useRequestServer) {
- prefix.append(httpRequest.getLocalName());
- waybackPort = String.valueOf(httpRequest.getLocalPort());
- } else {
- prefix.append(httpRequest.getServerName());
- waybackPort = String.valueOf(httpRequest.getServerPort());
- }
- if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) {
- prefix.append(":").append(waybackPort);
- }
- String contextPath = getContextPath(httpRequest);
- prefix.append(contextPath);
- return prefix.toString();
- }
-
- /**
- * @param httpRequest HttpServletRequest which is being handled
- * @return absolute URL pointing to the base of this WaybackContext, using
- * Server and port information from the HttpServletRequest argument.
- */
- public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) {
- return getAbsoluteContextPrefix(httpRequest, true);
- }
-
- /**
- * @param httpRequest HttpServletRequest which is being handled
- * @return absolute URL pointing to the base of this WaybackContext, using
- * Canonical server and port information.
- */
- public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) {
- if(urlRoot != null) {
- return urlRoot;
- }
- return getAbsoluteContextPrefix(httpRequest, useServerName);
- }
-
protected boolean dispatchLocal(HttpServletRequest httpRequest,
HttpServletResponse httpResponse)
throws ServletException, IOException {
@@ -343,7 +116,7 @@
String translated = "/" + translateRequestPathQuery(httpRequest);
WaybackRequest wbRequest = new WaybackRequest();
- wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest));
+ wbRequest.setContextPrefix(getUrlRoot());
wbRequest.setAccessPoint(this);
wbRequest.fixup(httpRequest);
UIResults uiResults = new UIResults(wbRequest,uriConverter);
@@ -355,7 +128,7 @@
}
return false;
}
-
+
/**
* @param httpRequest HttpServletRequest which is being handled
* @param httpResponse HttpServletResponse which is being handled
@@ -371,25 +144,28 @@
boolean handled = false;
try {
- wbRequest = parser.parse(httpRequest, this);
+ wbRequest = getParser().parse(httpRequest, this);
if(wbRequest != null) {
handled = true;
// TODO: refactor this code into RequestParser implementations
wbRequest.setAccessPoint(this);
- wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest));
+// wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest));
+ wbRequest.setContextPrefix(getUrlRoot());
wbRequest.fixup(httpRequest);
// end of refactor
- if(authentication != null) {
- if(!authentication.isTrue(wbRequest)) {
- throw new AuthenticationControlException("Not authorized");
+ if(getAuthentication() != null) {
+ if(!getAuthentication().isTrue(wbRequest)) {
+ throw new AuthenticationControlException(
+ "Unauthorized");
}
}
- if(exclusionFactory != null) {
- ExclusionFilter exclusionFilter = exclusionFactory.get();
+ if(getExclusionFactory() != null) {
+ ExclusionFilter exclusionFilter =
+ getExclusionFactory().get();
if(exclusionFilter == null) {
throw new AdministrativeAccessControlException(
"AccessControl list unavailable");
@@ -399,8 +175,8 @@
// TODO: refactor this into RequestParser implementations, so a
// user could alter requests to change the behavior within a
// single AccessPoint. For now, this is a simple way to expose
- // the feature to configuration.
- wbRequest.setExactScheme(exactSchemeMatch);
+ // the feature to configuration.g
+ wbRequest.setExactScheme(isExactSchemeMatch());
if(wbRequest.isReplayRequest()) {
@@ -408,7 +184,7 @@
} else {
- wbRequest.setExactHost(exactHostMatch);
+ wbRequest.setExactHost(isExactHostMatch());
handleQuery(wbRequest,httpRequest,httpResponse);
}
} else {
@@ -422,20 +198,34 @@
} catch(WaybackException e) {
boolean drawError = true;
if(e instanceof ResourceNotInArchiveException) {
- if(liveWebPrefix != null) {
- String liveUrl = liveWebPrefix + wbRequest.getRequestUrl();
+ if(getLiveWebPrefix() != null) {
+ String liveUrl =
+ getLiveWebPrefix() + wbRequest.getRequestUrl();
httpResponse.sendRedirect(liveUrl);
drawError = false;
}
}
if(drawError) {
logNotInArchive(e,wbRequest);
- exception.renderException(httpRequest, httpResponse, wbRequest, e,
- uriConverter);
+ getException().renderException(httpRequest, httpResponse,
+ wbRequest, e, getUriConverter());
}
}
return handled;
}
+
+ private void logNotInArchive(WaybackException e, WaybackRequest r) {
+ // TODO: move this into ResourceNotInArchiveException constructor
+ if(e instanceof ResourceNotInArchiveException) {
+ String url = r.getRequestUrl();
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("NotInArchive\t");
+ sb.append(getUrlRoot()).append("\t");
+ sb.append(url);
+
+ LOGGER.info(sb.toString());
+ }
+ }
private void handleReplay(WaybackRequest wbRequest,
HttpServletRequest httpRequest, HttpServletResponse httpResponse)
@@ -443,24 +233,31 @@
Resource resource = null;
try {
PerformanceLogger p = new PerformanceLogger("replay");
- SearchResults results = collection.getResourceIndex().query(wbRequest);
+ SearchResults results =
+ getCollection().getResourceIndex().query(wbRequest);
p.queried();
if(!(results instanceof CaptureSearchResults)) {
throw new ResourceNotAvailableException("Bad results...");
}
- CaptureSearchResults captureResults = (CaptureSearchResults) results;
+ CaptureSearchResults captureResults =
+ (CaptureSearchResults) results;
// TODO: check which versions are actually accessible right now?
CaptureSearchResult closest = captureResults.getClosest(wbRequest,
- useAnchorWindow);
+ isUseAnchorWindow());
closest.setClosest(true);
- resource = collection.getResourceStore().retrieveResource(closest);
+ resource =
+ getCollection().getResourceStore().retrieveResource(closest);
p.retrieved();
- ReplayRenderer renderer = replay.getRenderer(wbRequest, closest, resource);
+ ReplayRenderer renderer =
+ getReplay().getRenderer(wbRequest, closest, resource);
+
renderer.renderResource(httpRequest, httpResponse, wbRequest,
- closest, resource, uriConverter, captureResults);
+ closest, resource, getUriConverter(), captureResults);
+
p.rendered();
- p.write(wbRequest.getReplayTimestamp() + " " + wbRequest.getRequestUrl());
+ p.write(wbRequest.getReplayTimestamp() + " " +
+ wbRequest.getRequestUrl());
} finally {
if(resource != null) {
resource.close();
@@ -473,18 +270,19 @@
throws ServletException, IOException, WaybackException {
PerformanceLogger p = new PerformanceLogger("query");
- SearchResults results = collection.getResourceIndex().query(wbRequest);
+ SearchResults results =
+ getCollection().getResourceIndex().query(wbRequest);
p.queried();
if(results instanceof CaptureSearchResults) {
CaptureSearchResults cResults = (CaptureSearchResults) results;
cResults.markClosest(wbRequest);
- query.renderCaptureResults(httpRequest,httpResponse,wbRequest,
- cResults,uriConverter);
+ getQuery().renderCaptureResults(httpRequest,httpResponse,wbRequest,
+ cResults,getUriConverter());
} else if(results instanceof UrlSearchResults) {
UrlSearchResults uResults = (UrlSearchResults) results;
- query.renderUrlResults(httpRequest,httpResponse,wbRequest,
- uResults,uriConverter);
+ getQuery().renderUrlResults(httpRequest,httpResponse,wbRequest,
+ uResults,getUriConverter());
} else {
throw new WaybackException("Unknown index format");
}
@@ -492,172 +290,199 @@
p.write(wbRequest.getRequestUrl());
}
+
/**
* Release any resources associated with this AccessPoint, including
* stopping any background processing threads
- *
- * @throws IOException per usual
*/
- public void shutdown() throws IOException {
+ public void shutdown() {
if(collection != null) {
- collection.shutdown();
+ try {
+ collection.shutdown();
+ } catch (IOException e) {
+ LOGGER.error("FAILED collection shutdown", e);
+ }
}
if(exclusionFactory != null) {
exclusionFactory.shutdown();
}
}
- private void logNotInArchive(WaybackException e, WaybackRequest r) {
- // TODO: move this into ResourceNotInArchiveException constructor
- if(e instanceof ResourceNotInArchiveException) {
- String url = r.getRequestUrl();
- StringBuilder sb = new StringBuilder(100);
- sb.append("NotInArchive\t");
- sb.append(contextName).append("\t");
- sb.append(contextPort).append("\t");
- sb.append(url);
-
- LOGGER.info(sb.toString());
- }
+ /*
+ * *******************************************************************
+ * *******************************************************************
+ *
+ * ALL GETTER/SETTER BELOW HERE
+ *
+ * *******************************************************************
+ * *******************************************************************
+ */
+
+ /**
+ * @return the exactHostMatch
+ */
+ public boolean isExactHostMatch() {
+ return exactHostMatch;
}
/**
- * @param contextPort the contextPort to set
+ * @param exactHostMatch if true, then only SearchResults exactly matching
+ * the requested hostname will be returned from this AccessPoint. If
+ * false, then hosts which canonicalize to the same host as requested
+ * hostname will be returned (www.)
*/
- public void setContextPort(int contextPort) {
- this.contextPort = contextPort;
+ public void setExactHostMatch(boolean exactHostMatch) {
+ this.exactHostMatch = exactHostMatch;
}
/**
- * @param contextName the contextName to set
+ * @return the exactSchemeMatch
*/
- public void setContextName(String contextName) {
- this.contextName = contextName;
+ public boolean isExactSchemeMatch() {
+ return exactSchemeMatch;
}
/**
- * @param replay the replay to set
+ * @param exactSchemeMatch the exactSchemeMatch to set
*/
- public void setReplay(ReplayDispatcher replay) {
- this.replay = replay;
+ public void setExactSchemeMatch(boolean exactSchemeMatch) {
+ this.exactSchemeMatch = exactSchemeMatch;
}
/**
- * @param query the query to set
+ * @return true if this AccessPoint is configured to useAnchorWindow, that
+ * is, to replay documents only if they are within a certain proximity to
+ * the users requested AnchorDate
*/
- public void setQuery(QueryRenderer query) {
- this.query = query;
+ public boolean isUseAnchorWindow() {
+ return useAnchorWindow;
}
/**
- * @param parser the parser to set
+ * @param useAnchorWindow , when set to true, causes this AccessPoint to
+ * only replay documents if they are within a certain proximity to
+ * the users requested AnchorDate
*/
- public void setParser(RequestParser parser) {
- this.parser = parser;
+ public void setUseAnchorWindow(boolean useAnchorWindow) {
+ this.useAnchorWindow = useAnchorWindow;
}
/**
- * @param uriConverter the uriConverter to set
+ * @return the useServerName
+ * @deprecated no longer used, use urlPrefix
*/
- public void setUriConverter(ResultURIConverter uriConverter) {
- this.uriConverter = uriConverter;
+ public boolean isUseServerName() {
+ return useServerName;
}
-
/**
- * @return the contextPort
+ * @param useServerName the useServerName to set
+ * @deprecated no longer used, use urlPrefix
*/
- public int getContextPort() {
- return contextPort;
+ public void setUseServerName(boolean useServerName) {
+ this.useServerName = useServerName;
}
/**
- * @return the configs
+ * @return the liveWebPrefix String to use, or null, if this AccessPoint
+ * does not use the Live Web to fill in documents missing from the archive
*/
- public Properties getConfigs() {
- return configs;
+ public String getLiveWebPrefix() {
+ return liveWebPrefix;
}
/**
- * @param configs the configs to set
+ * @param liveWebPrefix the String URL prefix to use to attempt to retrieve
+ * documents missing from the collection from the live web, on demand.
*/
- public void setConfigs(Properties configs) {
- this.configs = configs;
+ public void setLiveWebPrefix(String liveWebPrefix) {
+ this.liveWebPrefix = liveWebPrefix;
}
/**
- * @return the useServerName
+ * @return the String url prefix to use when generating self referencing
+ * URLs
*/
- public boolean isUseServerName() {
- return useServerName;
+ public String getUrlRoot() {
+ return urlRoot;
}
/**
- * @param useServerName the useServerName to set
+ * @param urlRoot explicit URL prefix to use when creating self referencing
+ * URLs
*/
- public void setUseServerName(boolean useServerName) {
- this.useServerName = useServerName;
+ public void setUrlRoot(String urlRoot) {
+ this.urlRoot = urlRoot;
}
/**
- * @return the useAnchorWindow
+ * @return explicit Locale to use within this AccessPoint.
*/
- public boolean isUseAnchorWindow() {
- return useAnchorWindow;
+ public Locale getLocale() {
+ return locale;
}
/**
- * @param useAnchorWindow the useAnchorWindow to set
+ * @param locale explicit Locale to use for requests within this
+ * AccessPoint. If not set, will attempt to use the one specified by
+ * each requests User Agent via HTTP headers
*/
- public void setUseAnchorWindow(boolean useAnchorWindow) {
- this.useAnchorWindow = useAnchorWindow;
+ public void setLocale(Locale locale) {
+ this.locale = locale;
}
-
+
/**
- * @return the exactSchemeMatch
+ * @return the generic customization Properties used with this AccessPoint,
+ * generally to tune the UI
*/
- public boolean isExactSchemeMatch() {
- return exactSchemeMatch;
+ public Properties getConfigs() {
+ return configs;
}
/**
- * @param exactSchemeMatch the exactSchemeMatch to set
+ * @param configs the generic customization Properties to use with this
+ * AccessPoint, generally used to tune the UI
*/
- public void setExactSchemeMatch(boolean exactSchemeMatch) {
- this.exactSchemeMatch = exactSchemeMatch;
+ public void setConfigs(Properties configs) {
+ this.configs = configs;
}
/**
- * @return the ExclusionFilterFactory in use with this AccessPoint
+ * @return List of file patterns that will be matched when querying the
+ * ResourceIndex
*/
- public ExclusionFilterFactory getExclusionFactory() {
- return exclusionFactory;
+ public List<String> getFilePatterns() {
+ return filePatterns;
}
/**
- * @param exclusionFactory all requests to this AccessPoint will create an
- * exclusionFilter from this factory when handling requests
+ * @param filePatterns List of file Patterns (regular expressions) that
+ * will be matched when querying the ResourceIndex - only SearchResults
+ * matching one of these patterns will be returned.
*/
- public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) {
- this.exclusionFactory = exclusionFactory;
+ public void setFilePatterns(List<String> filePatterns) {
+ this.filePatterns = filePatterns;
}
/**
- * @return the configured AuthenticationControl operator in use with this
- * AccessPoint.
+ * @return List of file String prefixes that will be matched when querying
+ * the ResourceIndex
*/
- public BooleanOperator<WaybackRequest> getAuthentication() {
- return authentication;
+ public List<String> getFilePrefixes() {
+ return filePrefixes;
}
/**
- * @param authentication the BooleanOperator which determines if incoming
- * requests are allowed to connect to this AccessPoint.
+ * @param filePrefixes List of String file prefixes that will be matched
+ * when querying the ResourceIndex - only SearchResults from files
+ * with a prefix matching one of those in this List will be returned.
*/
- public void setAuthentication(BooleanOperator<WaybackRequest> authentication) {
- this.authentication = authentication;
+ public void setFilePrefixes(List<String> filePrefixes) {
+ this.filePrefixes = filePrefixes;
}
+
+
/**
* @return the WaybackCollection used by this AccessPoint
*/
@@ -687,49 +512,97 @@
}
/**
- * @return the String url prefix to use when generating self referencing
- * URLs
+ * @return the QueryRenderer to use with this AccessPoint
*/
- public String getUrlRoot() {
- return urlRoot;
+ public QueryRenderer getQuery() {
+ return query;
}
+
+ /**
+ * @param query the QueryRenderer responsible for returning query data to
+ * clients.
+ */
+ public void setQuery(QueryRenderer query) {
+ this.query = query;
+ }
/**
- * @param urlRoot explicit URL prefix to use when creating self referencing
- * URLs
+ * @return the RequestParser used by this AccessPoint to attempt to
+ * translate incoming HttpServletRequest objects into WaybackRequest
+ * objects
*/
- public void setUrlRoot(String urlRoot) {
- this.urlRoot = urlRoot;
+ public RequestParser getParser() {
+ return parser;
}
+
+ /**
+ * @param parser the RequestParser to use with this AccessPoint
+ */
+ public void setParser(RequestParser parser) {
+ this.parser = parser;
+ }
/**
- * @return the exactHostMatch
+ * @return the ReplayDispatcher to use with this AccessPoint, responsible
+ * for returning an appropriate ReplayRenderer given the user request and
+ * the returned document type.
*/
- public boolean isExactHostMatch() {
- return exactHostMatch;
+ public ReplayDispatcher getReplay() {
+ return replay;
}
/**
- * @param exactHostMatch if true, then only SearchResults exactly matching
- * the requested hostname will be returned from this AccessPoint. If
- * false, then hosts which canonicalize to the same host as requested
- * hostname will be returned (www.)
+ * @param replay the ReplayDispatcher to use with this AccessPoint.
*/
- public void setExactHostMatch(boolean exactHostMatch) {
- this.exactHostMatch = exactHostMatch;
+ public void setReplay(ReplayDispatcher replay) {
+ this.replay = replay;
}
/**
- * @return the liveWebPrefix
+ * @return the ResultURIConverter used to construct Replay URLs within this
+ * AccessPoint
*/
- public String getLiveWebPrefix() {
- return liveWebPrefix;
+ public ResultURIConverter getUriConverter() {
+ return uriConverter;
}
/**
- * @param liveWebPrefix the liveWebPrefix to set
+ * @param uriConverter the ResultURIConverter to use with this AccessPoint
+ * to construct Replay URLs
*/
- public void setLiveWebPrefix(String liveWebPrefix) {
- this.liveWebPrefix = liveWebPrefix;
+ public void setUriConverter(ResultURIConverter uriConverter) {
+ this.uriConverter = uriConverter;
}
+
+
+ /**
+ * @return the ExclusionFilterFactory in use with this AccessPoint
+ */
+ public ExclusionFilterFactory getExclusionFactory() {
+ return exclusionFactory;
+ }
+
+ /**
+ * @param exclusionFactory all requests to this AccessPoint will create an
+ * exclusionFilter from this factory when handling requests
+ */
+ public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) {
+ this.exclusionFactory = exclusionFactory;
+ }
+
+ /**
+ * @return the configured AuthenticationControl BooleanOperator in use with
+ * this AccessPoint.
+ */
+ public BooleanOperator<WaybackRequest> getAuthentication() {
+ return authentication;
+ }
+
+ /**
+ * @param auth the BooleanOperator which determines if incoming
+ * requests are allowed to connect to this AccessPoint.
+ */
+ public void setAuthentication(BooleanOperator<WaybackRequest> auth) {
+ this.authentication = auth;
+ }
}
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2010-04-27 22:45:40 UTC (rev 3080)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2010-04-27 22:47:37 UTC (rev 3081)
@@ -45,84 +45,34 @@
import org.archive.wayback.liveweb.LiveWebCache;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
import org.archive.wayback.resourcestore.resourcefile.ArcResource;
-import org.springframework.beans.factory.BeanNameAware;
+import org.archive.wayback.util.webapp.AbstractRequestHandler;
/**
* @author brad
*
- * AccessPoint subclass which allows no Queries, but makes all replay requests
- * through a LiveWebCache
+ * RequestHandler which satisfies all incoming requests through a LiveWebCache,
+ * using an internal AccessPoint to rewrite replayed documents.
*
*/
-public class LiveWebAccessPoint extends ServletRequestContext implements BeanNameAware {
+public class LiveWebAccessPoint extends AbstractRequestHandler {
private AccessPoint inner = null;
private LiveWebCache cache = null;
private RobotExclusionFilterFactory robotFactory = null;
private long maxCacheMS = 86400000;
- private String beanName = null;
- private int contextPort = 0;
- private String contextName = null;
- public void setBeanName(String beanName) {
- this.beanName = beanName;
- this.contextName = "";
- int idx = beanName.indexOf(":");
- if(idx > -1) {
- contextPort = Integer.valueOf(beanName.substring(0,idx));
- contextName = beanName.substring(idx + 1);
- } else {
- try {
- this.contextPort = Integer.valueOf(beanName);
- } catch(NumberFormatException e) {
- e.printStackTrace();
- }
- }
- }
- /**
- * @param httpRequest HttpServletRequest which is being handled
- * @return the prefix of paths received by this server that are handled by
- * this WaybackContext, including the trailing '/'
- */
- public String getContextPath(HttpServletRequest httpRequest) {
- String httpContextPath = httpRequest.getContextPath();
- if(contextName.length() == 0) {
- return httpContextPath + "/";
- }
- return httpContextPath + "/" + contextName + "/";
- }
-
-
- protected String translateRequest(HttpServletRequest httpRequest,
- boolean includeQuery) {
-
- String origRequestPath = httpRequest.getRequestURI();
- if(includeQuery) {
- String queryString = httpRequest.getQueryString();
- if (queryString != null) {
- origRequestPath += "?" + queryString;
- }
- }
- String contextPath = getContextPath(httpRequest);
- if (!origRequestPath.startsWith(contextPath)) {
- if(contextPath.startsWith(origRequestPath)) {
- // missing trailing '/', just omit:
- return "";
- }
- return null;
- }
- return origRequestPath.substring(contextPath.length());
- }
-
public boolean handleRequest(HttpServletRequest httpRequest,
HttpServletResponse httpResponse)
throws ServletException, IOException {
- String urlString = translateRequest(httpRequest,true);
+ String urlString = translateRequestPathQuery(httpRequest);
+
boolean handled = true;
WaybackRequest wbRequest = new WaybackRequest();
wbRequest.setAccessPoint(inner);
- wbRequest.setContextPrefix(inner.getAbsoluteServerPrefix(httpRequest));
- wbRequest.setServerPrefix(inner.getAbsoluteServerPrefix(httpRequest));
+
+ wbRequest.setContextPrefix(inner.getUrlRoot());
+ wbRequest.setServerPrefix(inner.getUrlRoot());
+
wbRequest.setLiveWebRequest(true);
wbRequest.setRequestUrl(urlString);
URL url = null;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|