You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: <bra...@us...> - 2010-05-18 22:54:16
|
Revision: 3105 http://archive-access.svn.sourceforge.net/archive-access/?rev=3105&view=rev Author: bradtofel Date: 2010-05-18 22:54:10 +0000 (Tue, 18 May 2010) Log Message: ----------- INITIAL REV: Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java 2010-05-18 22:54:10 UTC (rev 3105) @@ -0,0 +1,79 @@ +/* JSPReplayRenderer + * + * $Id$: + * + * Created on May 7, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.replay; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ReplayRenderer; +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.WaybackException; + +/** + * ReplayRenderer implementation which just forwards responsibility for + * rendering a resource to a .jsp file. + * + * @author brad + * + */ +public class JSPReplayRenderer implements ReplayRenderer { + private String targetJsp = null; + + public void renderResource(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + CaptureSearchResult result, Resource resource, + ResultURIConverter uriConverter, CaptureSearchResults results) + throws ServletException, IOException, WaybackException { + UIResults uiResults = + new UIResults(wbRequest, uriConverter, results, result, resource); + uiResults.forward(httpRequest, httpResponse, targetJsp); + } + + /** + * @return the context-relative path to the .jsp responsible for rendering + * the resource + */ + public String getTargetJsp() { + return targetJsp; + } + + /** + * @param targetJsp the context-relative path to the .jsp responsible for + * rendering the resource + */ + public void setTargetJsp(String targetJsp) { + this.targetJsp = targetJsp; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/JSPReplayRenderer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-18 22:46:13
|
Revision: 3104 http://archive-access.svn.sourceforge.net/archive-access/?rev=3104&view=rev Author: bradtofel Date: 2010-05-18 22:46:07 +0000 (Tue, 18 May 2010) Log Message: ----------- Translate escaped characters within resolved urls prior to contextualizing. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-18 22:44:22 UTC (rev 3103) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-18 22:46:07 UTC (rev 3104) @@ -31,6 +31,7 @@ import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; import org.archive.net.UURIFactory; +import org.htmlparser.util.Translate; /** * Class which tracks the context and state involved with parsing an HTML @@ -93,6 +94,7 @@ * @throws URISyntaxException if the input URL is malformed */ public String resolve(String url) throws URISyntaxException { + url = Translate.decode(url); int hashIdx = url.indexOf('#'); String frag = ""; if(hashIdx != -1) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-18 22:44:29
|
Revision: 3103 http://archive-access.svn.sourceforge.net/archive-access/?rev=3103&view=rev Author: bradtofel Date: 2010-05-18 22:44:22 +0000 (Tue, 18 May 2010) Log Message: ----------- Experimental: changes to make a SearchResultSource that can be directly indexed by ordinal position. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -0,0 +1,40 @@ +/* SequencedSearchResultSource + * + * $Id$: + * + * Created on May 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.exception.ResourceIndexNotAvailableException; +import org.archive.wayback.util.CloseableIterator; + +/** + * @author brad + * + */ +public interface SequencedSearchResultSource extends SearchResultSource { + public CloseableIterator<CaptureSearchResult> + getPrefixIterator(final String prefix, int startIdx) + throws ResourceIndexNotAvailableException; +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/SequencedSearchResultSource.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -0,0 +1,63 @@ +/* SkippingStringPrefixIterator + * + * $Id$: + * + * Created on May 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.ziplines; + +import java.util.Iterator; + +/** + * @author brad + * + */ +public class SkippingStringPrefixIterator extends StringPrefixIterator { + private long skipCount = 0; + private long totalMatches = -1; + + public SkippingStringPrefixIterator(Iterator<String> inner, String prefix, + long skipCount) { + super(inner,prefix); + this.skipCount = skipCount; + } + public SkippingStringPrefixIterator(Iterator<String> inner, String prefix) { + super(inner,prefix); + } + public long getTotalMatches() { + return totalMatches; + } + public void setTotalMatches(long totalMatches) { + this.totalMatches = totalMatches; + } + public boolean hasNext() { + while(skipCount > 0) { + if(super.hasNext()) { + next(); + skipCount--; + } else { + return false; + } + } + return super.hasNext(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/SkippingStringPrefixIterator.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java 2010-05-18 22:38:59 UTC (rev 3102) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/StringPrefixIterator.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -47,6 +47,9 @@ truncated = ((ZiplinesChunkIterator)inner).isTruncated(); } } + public long getTotalMatches() { + return 0 ; + } public boolean isTruncated() { return truncated; } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -0,0 +1,141 @@ +/* ZiplineBlockMatches + * + * $Id$: + * + * Created on May 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.ziplines; + +import java.io.BufferedReader; +import java.io.IOException; +import java.util.ArrayList; + +/** + * @author brad + * + */ +public class ZiplineBlockMatches { + private ArrayList<ZiplinedBlock> blocks = null; + private String prefix = null; + private int cachedFirstCount = -1; + private int cachedLastCount = -1; + public ZiplineBlockMatches(ArrayList<ZiplinedBlock> blocks, String prefix) { + this.blocks = blocks; + this.prefix = prefix; + cachedFirstCount = -1; + cachedLastCount = -1; + } + + public StringPrefixIterator getIterator() { + ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks); + zci.setTruncated(false); + return new StringPrefixIterator(zci,prefix); + } + + public StringPrefixIterator getIteratorAt(long skip) throws IOException { + SkippingStringPrefixIterator itr = null; + ArrayList<ZiplinedBlock> matchingBlocked = + new ArrayList<ZiplinedBlock>(); + long total = getTotalMatching(); + if(skip > total) { + // TODO: should return empty itr... + return null; + } + long firstBlockMatches = + countMatchesInStartBlock(blocks.get(0), prefix); + if(skip < firstBlockMatches) { + ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks); + itr = new SkippingStringPrefixIterator(zci,prefix,skip); + itr.setTotalMatches(total); + return itr; + } + skip -= firstBlockMatches; + int size = blocks.size(); + for(int i = 1; i < size; i++) { + ZiplinedBlock block = blocks.get(i); + if(block.count > skip) { + // this is the block to start: + ZiplinesChunkIterator zci = + new ZiplinesChunkIterator(blocks.subList(i, size)); + itr = new SkippingStringPrefixIterator(zci,prefix,skip); + itr.setTotalMatches(total); + return itr; + } + skip -= block.count; + } + // should never get here... + return null; + } + + public long getTotalMatching() throws IOException { + if(blocks == null) { + return 0; + } + int size = blocks.size(); + if(size == 0) { + return 0; + } + long count = countMatchesInStartBlock(blocks.get(0),prefix); + if(size == 1) { + return count; + } + for(int i = 1; i < size-1; i++) { + count += blocks.get(i).count; + } + count += countMatchesInLastBlock(blocks.get(size-1), prefix); + return count; + } + private long countMatchesInStartBlock(ZiplinedBlock block, String prefix) + throws IOException { + if(cachedFirstCount == -1) { + BufferedReader r = block.readBlock(); + int matches = block.count; + while(true) { + String nextLine = r.readLine(); + if((nextLine == null) || nextLine.startsWith(prefix)) { + r.close(); + cachedFirstCount = matches; + break; + } + matches--; + } + } + return cachedFirstCount; + } + private long countMatchesInLastBlock(ZiplinedBlock block, String prefix) + throws IOException { + if(cachedLastCount == -1) { + BufferedReader r = block.readBlock(); + int matches = 0; + while(true) { + String nextLine = r.readLine(); + if((nextLine == null) || !nextLine.startsWith(prefix)) { + r.close(); + cachedLastCount = matches; + break; + } + matches++; + } + } + return cachedLastCount; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplineBlockMatches.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-05-18 22:38:59 UTC (rev 3102) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlock.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -44,6 +44,7 @@ String urlOrPath = null; long offset = -1; + int count = 0; public final static int BLOCK_SIZE = 128 * 1024; private final static String RANGE_HEADER = "Range"; private final static String BYTES_HEADER = "bytes="; @@ -53,8 +54,17 @@ * @param offset start of 128K block boundary. */ public ZiplinedBlock(String urlOrPath, long offset) { + this(urlOrPath,offset,0); + } + /** + * @param urlOrPath URL where this file can be downloaded + * @param offset start of 128K block boundary. + * @param count number of records in this block + */ + public ZiplinedBlock(String urlOrPath, long offset, int count) { this.urlOrPath = urlOrPath; this.offset = offset; + this.count = count; } /** * @return a BufferedReader of the underlying compressed data in this block Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -0,0 +1,107 @@ +/* ZiplinedBlockIndex + * + * $Id$: + * + * Created on May 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.ziplines; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +import org.archive.wayback.exception.ResourceIndexNotAvailableException; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.flatfile.FlatFile; + +/** + * @author brad + * + */ +public class ZiplinedBlockStringSequence { + private FlatFile chunkIndex = null; + private HashMap<String,String> chunkMap = null; + private int maxBlocks = 10000; + + public ZiplinedBlockStringSequence(FlatFile chunkIndex, + HashMap<String,String> chunkMap) { + this.chunkIndex = chunkIndex; + this.chunkMap = chunkMap; + } + + private ZiplineBlockMatches getBlockMatches(String prefix) + throws IOException, ResourceIndexNotAvailableException { + ArrayList<ZiplinedBlock> blocks = new ArrayList<ZiplinedBlock>(); + boolean first = true; + int numBlocks = 0; + boolean truncated = false; + CloseableIterator<String> itr = null; + try { + itr = chunkIndex.getRecordIteratorLT(prefix); + while(itr.hasNext()) { + if(numBlocks >= maxBlocks) { + truncated = true; + break; + } + String blockDescriptor = itr.next(); + numBlocks++; + String parts[] = blockDescriptor.split("\t"); + if(parts.length != 4) { + throw new ResourceIndexNotAvailableException("Bad line(" + + blockDescriptor + ")"); + } + // only compare the correct length: + String prefCmp = prefix; + String blockCmp = parts[0]; + if(first) { + // always add first: + first = false; + } else if(!blockCmp.startsWith(prefCmp)) { + // all done; + break; + } + // add this and keep lookin... + String url = chunkMap.get(parts[1]); + long offset = Long.parseLong(parts[2]); + int count = Integer.parseInt(parts[3]); + + blocks.add(new ZiplinedBlock(url, offset, count)); + } + } finally { + if(itr != null) { + itr.close(); + } + } + return new ZiplineBlockMatches(blocks,prefix); + } + + public StringPrefixIterator getIterator(String prefix, long skip) + throws ResourceIndexNotAvailableException, IOException { + ZiplineBlockMatches matches = getBlockMatches(prefix); + return matches.getIteratorAt(skip); + } + public StringPrefixIterator getIterator(String prefix) + throws ResourceIndexNotAvailableException, IOException { + ZiplineBlockMatches matches = getBlockMatches(prefix); + return matches.getIterator(); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequence.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -0,0 +1,85 @@ +/* ZiplinedBlockStringSequenceTest + * + * $Id$: + * + * Created on May 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.ziplines; + +import java.io.IOException; +import java.util.HashMap; + +import org.archive.wayback.exception.ResourceIndexNotAvailableException; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.flatfile.FlatFile; + +import junit.framework.TestCase; + +/** + * @author brad + * + */ +public class ZiplinedBlockStringSequenceTest extends TestCase { + private String indexPath = "/home/brad/os-cdx/CDX-201002-clean/ALL.count.summary"; + private String mapPath = "/home/brad/os-cdx/CDX-201002-clean/ALL.loc-workstation"; + + private ZiplinedBlockStringSequence getSequence() throws IOException { + HashMap<String, String> chunkMap = new HashMap<String, String>(); + FlatFile ff = new FlatFile(mapPath); + CloseableIterator<String> lines = ff.getSequentialIterator(); + while(lines.hasNext()) { + String line = lines.next(); + String[] parts = line.split("\\s"); + if(parts.length != 2) { + throw new IOException("Bad line(" + line +") in (" + + mapPath + ")"); + } + chunkMap.put(parts[0],parts[1]); + } + lines.close(); + FlatFile chunkIndex = new FlatFile(indexPath); + return new ZiplinedBlockStringSequence(chunkIndex, chunkMap); + } + /** + * Test method for {@link org.archive.wayback.resourceindex.ziplines.ZiplinedBlockStringSequence#getIterator(java.lang.String, long)}. + * @throws IOException + * @throws ResourceIndexNotAvailableException + */ + public void testGetIteratorStringLong() throws IOException, ResourceIndexNotAvailableException { + ZiplinedBlockStringSequence seq = getSequence(); + StringPrefixIterator itr = seq.getIterator("yahoo.com/", 1000000); + System.out.format("Total Matches %d\n",itr.getTotalMatches()); + for(int i = 0; i < 10; i++) { + if(itr.hasNext()) { + System.out.format("Line(%d): %s\n",i,itr.next()); + } + } + } + + /** + * Test method for {@link org.archive.wayback.resourceindex.ziplines.ZiplinedBlockStringSequence#getIterator(java.lang.String)}. + */ + public void testGetIteratorString() { +// fail("Not yet implemented"); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinedBlockStringSequenceTest.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-05-18 22:38:59 UTC (rev 3102) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/ziplines/ZiplinesSearchResultSource.java 2010-05-18 22:44:22 UTC (rev 3103) @@ -39,6 +39,7 @@ import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.exception.ResourceIndexNotAvailableException; import org.archive.wayback.resourceindex.SearchResultSource; +import org.archive.wayback.resourceindex.SequencedSearchResultSource; import org.archive.wayback.resourceindex.cdx.CDXFormatToSearchResultAdapter; import org.archive.wayback.resourceindex.cdx.format.CDXFormat; import org.archive.wayback.resourceindex.cdx.format.CDXFormatException; @@ -132,10 +133,9 @@ throw new ResourceIndexNotAvailableException(e.getMessage()); } } - - public Iterator<String> getStringPrefixIterator(String prefix) - throws ResourceIndexNotAvailableException, IOException { + private ArrayList<ZiplinedBlock> getBlockListForPrefix(String prefix) + throws IOException, ResourceIndexNotAvailableException { ArrayList<ZiplinedBlock> blocks = new ArrayList<ZiplinedBlock>(); boolean first = true; int numBlocks = 0; @@ -175,8 +175,15 @@ itr.close(); } } + return blocks; + } + + public Iterator<String> getStringPrefixIterator(String prefix) + throws ResourceIndexNotAvailableException, IOException { + + ArrayList<ZiplinedBlock> blocks = getBlockListForPrefix(prefix); ZiplinesChunkIterator zci = new ZiplinesChunkIterator(blocks); - zci.setTruncated(truncated); + zci.setTruncated(false); return new StringPrefixIterator(zci,prefix); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-18 22:39:06
|
Revision: 3102 http://archive-access.svn.sourceforge.net/archive-access/?rev=3102&view=rev Author: bradtofel Date: 2010-05-18 22:38:59 +0000 (Tue, 18 May 2010) Log Message: ----------- INTERFACE: made guessed charset HTTP header name public Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-05-17 19:52:24 UTC (rev 3101) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-05-18 22:38:59 UTC (rev 3102) @@ -50,7 +50,7 @@ */ public abstract class TextReplayRenderer implements ReplayRenderer { - private static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset"; + public static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset"; private String guessedCharsetHeader = GUESSED_CHARSET_HEADER; private List<String> jspInserts = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3101 http://archive-access.svn.sourceforge.net/archive-access/?rev=3101&view=rev Author: bradtofel Date: 2010-05-17 19:52:24 +0000 (Mon, 17 May 2010) Log Message: ----------- FEATURE: exposed addDefaults() to internal ProxyReplayRequestParser, to enable/disable the possibly slow getLocalHostnames lookup Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java 2010-05-17 19:50:11 UTC (rev 3100) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyArchivalRequestParser.java 2010-05-17 19:52:24 UTC (rev 3101) @@ -105,4 +105,17 @@ } return wbRequest; } + /** + * @return the addDefaults + */ + public boolean isAddDefaults() { + return prrp.isAddDefaults(); + } + + /** + * @param addDefaults the addDefaults to set + */ + public void setAddDefaults(boolean addDefaults) { + prrp.setAddDefaults(addDefaults); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3100 http://archive-access.svn.sourceforge.net/archive-access/?rev=3100&view=rev Author: bradtofel Date: 2010-05-17 19:50:11 +0000 (Mon, 17 May 2010) Log Message: ----------- INTERFACE: made makeFlagDateSpec() public, added javadoc, and also made identity flag get forwarded. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java 2010-05-17 19:48:36 UTC (rev 3099) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlDateRedirectReplayRenderer.java 2010-05-17 19:50:11 UTC (rev 3100) @@ -63,9 +63,19 @@ String betterURI = uriConverter.makeReplayURI(captureDate,url); httpResponse.sendRedirect(betterURI); } - private String makeFlagDateSpec(String dateSpec, WaybackRequest request) { + + /** + * Given a date, and a WaybackRequest object, create a new datespec + flags + * which represent the same options as requested by the WaybackRequest + * @param timestamp the 14-digit timestamp to use + * @param request the WaybackRequest from which o get extra request option + * flags + * @return a String representing the flags on the WaybackRequest for the + * specified date + */ + public static String makeFlagDateSpec(String timestamp, WaybackRequest request) { StringBuilder sb = new StringBuilder(); - sb.append(dateSpec); + sb.append(timestamp); if(request.isCSSContext()) { sb.append(ArchivalUrlRequestParser.CSS_CONTEXT); sb.append(ArchivalUrlRequestParser.FLAG_DELIM); @@ -78,6 +88,10 @@ sb.append(ArchivalUrlRequestParser.IMG_CONTEXT); sb.append(ArchivalUrlRequestParser.FLAG_DELIM); } + if(request.isIdentityContext()) { + sb.append(ArchivalUrlRequestParser.IDENTITY_CONTEXT); + sb.append(ArchivalUrlRequestParser.FLAG_DELIM); + } return sb.toString(); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3099 http://archive-access.svn.sourceforge.net/archive-access/?rev=3099&view=rev Author: bradtofel Date: 2010-05-17 19:48:36 +0000 (Mon, 17 May 2010) Log Message: ----------- FEATURE: now strips default ports from urls when rewriting as replay URLs. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java 2010-05-17 19:45:41 UTC (rev 3098) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlResultURIConverter.java 2010-05-17 19:48:36 UTC (rev 3099) @@ -25,6 +25,7 @@ package org.archive.wayback.archivalurl; import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.util.url.UrlOperations; /** * @@ -42,15 +43,24 @@ * @see org.archive.wayback.ResultURIConverter#makeReplayURI(java.lang.String, java.lang.String) */ public String makeReplayURI(String datespec, String url) { - String suffix = datespec + "/" + url; + StringBuilder sb = null; + if(replayURIPrefix == null) { - return suffix; - } else { - if(url.startsWith(replayURIPrefix)) { - return url; - } - return replayURIPrefix + suffix; + sb = new StringBuilder(url.length() + datespec.length()); + sb.append(datespec); + sb.append("/"); + sb.append(UrlOperations.stripDefaultPortFromUrl(url)); + return sb.toString(); } + if(url.startsWith(replayURIPrefix)) { + return url; + } + sb = new StringBuilder(url.length() + datespec.length()); + sb.append(replayURIPrefix); + sb.append(datespec); + sb.append("/"); + sb.append(UrlOperations.stripDefaultPortFromUrl(url)); + return sb.toString(); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3098 http://archive-access.svn.sourceforge.net/archive-access/?rev=3098&view=rev Author: bradtofel Date: 2010-05-17 19:45:41 +0000 (Mon, 17 May 2010) Log Message: ----------- LOGGING Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2010-05-17 19:44:24 UTC (rev 3097) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/BeanNameRegistrar.java 2010-05-17 19:45:41 UTC (rev 3098) @@ -116,11 +116,14 @@ String name = handler.getBeanName(); if(name != null) { if(name.equals(RequestMapper.GLOBAL_PRE_REQUEST_HANDLER)) { - + LOGGER.info("Registering Global-pre request handler:" + + handler); mapper.addGlobalPreRequestHandler(handler); } else if(name.equals(RequestMapper.GLOBAL_POST_REQUEST_HANDLER)) { + LOGGER.info("Registering Global-post request handler:" + + handler); mapper.addGlobalPostRequestHandler(handler); } else { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3097 http://archive-access.svn.sourceforge.net/archive-access/?rev=3097&view=rev Author: bradtofel Date: 2010-05-17 19:44:24 +0000 (Mon, 17 May 2010) Log Message: ----------- TWEAK: added tests for resolving empty path Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-17 19:37:09 UTC (rev 3096) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-17 19:44:24 UTC (rev 3097) @@ -115,7 +115,11 @@ UrlOperations.resolveUrl(scheme + "a.org/3/","1/2")); assertEquals(scheme + "a.org/1/2", - UrlOperations.resolveUrl(scheme + "a.org/3","1/2")); + UrlOperations.resolveUrl(scheme + "a.org/3","1/2")); + assertEquals(scheme + "a.org/3", + UrlOperations.resolveUrl(scheme + "a.org/3","")); + assertEquals(scheme + "a.org/3.html", + UrlOperations.resolveUrl(scheme + "a.org/3.html","")); } } public void testUrlToScheme() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3096 http://archive-access.svn.sourceforge.net/archive-access/?rev=3096&view=rev Author: bradtofel Date: 2010-05-17 19:37:09 +0000 (Mon, 17 May 2010) Log Message: ----------- BUGFIX(unreported): now adds 'http' to request URL if scheme is missing. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-05-17 19:33:41 UTC (rev 3095) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-05-17 19:37:09 UTC (rev 3096) @@ -32,6 +32,7 @@ import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.Timestamp; +import org.archive.wayback.util.url.UrlOperations; import org.archive.wayback.webapp.AccessPoint; /** @@ -89,6 +90,12 @@ } // just jam everything else in: String val = AccessPoint.getMapParam(queryMap,key); + if(key.equals(WaybackRequest.REQUEST_URL)) { + String scheme = UrlOperations.urlToScheme(val); + if(scheme == null) { + val = UrlOperations.HTTP_SCHEME + val; + } + } wbRequest.put(key,val); } String partialTS = wbRequest.getReplayTimestamp(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3095 http://archive-access.svn.sourceforge.net/archive-access/?rev=3095&view=rev Author: bradtofel Date: 2010-05-17 19:33:41 +0000 (Mon, 17 May 2010) Log Message: ----------- FEATURE: now uses resource code to auto-detect and unchunkify chunked content. also does not forward on some hop-by-hop HTTP headers Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-05-17 19:28:20 UTC (rev 3094) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-05-17 19:33:41 UTC (rev 3095) @@ -26,19 +26,24 @@ package org.archive.wayback.liveweb; import java.io.IOException; +import java.util.Iterator; +import java.util.Map; import java.util.zip.GZIPInputStream; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HostConfiguration; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.log4j.Logger; import org.archive.io.arc.ARCRecord; +import org.archive.wayback.core.Resource; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; import org.archive.wayback.util.ByteOp; import org.archive.wayback.util.webapp.AbstractRequestHandler; @@ -56,6 +61,8 @@ */ public class ARCUnwrappingProxy extends AbstractRequestHandler { + private static final Logger LOGGER = + Logger.getLogger(ARCUnwrappingProxy.class.getName()); private MultiThreadedHttpConnectionManager connectionManager = null; private HostConfiguration hostConfiguration = null; /** @@ -75,7 +82,6 @@ sb.append("?").append(query); } HttpMethod method = new GetMethod(sb.toString()); -// method.addRequestHeader("User-Agent", userAgent); boolean got200 = false; try { HttpClient http = new HttpClient(connectionManager); @@ -87,14 +93,28 @@ new ARCRecord(new GZIPInputStream( method.getResponseBodyAsStream()), "id",0L,false,false,true); - r.skipHttpHeader(); - httpResponse.setStatus(r.getStatusCode()); - Header headers[] = r.getHttpHeaders(); - for(Header header : headers) { - httpResponse.addHeader(header.getName(), header.getValue()); + Resource res = null; + try { + res = ResourceFactory.ARCArchiveRecordToResource(r, null); + } catch (ResourceNotAvailableException e) { + LOGGER.error(e); + throw new IOException(e); + } + httpResponse.setStatus(res.getStatusCode()); + + Map<String,String> headers = res.getHttpHeaders(); + Iterator<String> keys = headers.keySet().iterator(); + while(keys.hasNext()) { + String key = keys.next(); + if(!key.equalsIgnoreCase("Connection") + && !key.equalsIgnoreCase("Content-Length") + && !key.equalsIgnoreCase("Transfer-Encoding")) { + String value = headers.get(key); + httpResponse.addHeader(key, value); + } } - ByteOp.copyStream(r, httpResponse.getOutputStream()); + ByteOp.copyStream(res, httpResponse.getOutputStream()); got200 = true; } } finally { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-17 19:28:26
|
Revision: 3094 http://archive-access.svn.sourceforge.net/archive-access/?rev=3094&view=rev Author: bradtofel Date: 2010-05-17 19:28:20 +0000 (Mon, 17 May 2010) Log Message: ----------- BUGFIX: now allows spaces(' ') after the chunk hex byte length when detecting chunked contents. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-05-17 19:27:12 UTC (rev 3093) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-05-17 19:28:20 UTC (rev 3094) @@ -115,7 +115,8 @@ // better be a hex character: if(isHex(nextC)) { hexFound++; - } else { + } else if(nextC != ' ') { + // allow whitespace before or after chunk... // not a hex digit: not a chunked stream. break; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3093 http://archive-access.svn.sourceforge.net/archive-access/?rev=3093&view=rev Author: bradtofel Date: 2010-05-17 19:27:12 +0000 (Mon, 17 May 2010) Log Message: ----------- INITIAL REV: leaner default implementation of current server-side rewrite rules. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java 2010-05-17 19:27:12 UTC (rev 3093) @@ -0,0 +1,346 @@ +/* FastArchivalUrlReplayParseEventHandler + * + * $Id$: + * + * Created on May 4, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.archivalurl; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.Charset; +import java.util.HashMap; + +import javax.servlet.ServletException; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; +import org.archive.wayback.replay.html.transformer.BlockCSSStringTransformer; +import org.archive.wayback.replay.html.transformer.InlineCSSStringTransformer; +import org.archive.wayback.replay.html.transformer.JSStringTransformer; +import org.archive.wayback.replay.html.transformer.MetaRefreshUrlStringTransformer; +import org.archive.wayback.replay.html.transformer.URLStringTransformer; +import org.archive.wayback.util.htmllex.NodeUtils; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.ParseEventHandler; +import org.htmlparser.Node; +import org.htmlparser.nodes.TagNode; +import org.htmlparser.nodes.TextNode; + +/** + * Lean and mean ParseEventHandler implementing current best-known server-side + * HTML rewrite rules, and should be much faster than the fully configurable + * version. + * + * @author brad + * + */ +public class FastArchivalUrlReplayParseEventHandler implements + ParseEventHandler { + + private final static String FERRET_DONE_KEY = + FastArchivalUrlReplayParseEventHandler.class.toString(); + + private String jspInsertPath = "/WEB-INF/replay/DisclaimChooser.jsp"; + + private final String[] okHeadTags = { "!DOCTYPE", "HTML", "HEAD", "BASE", + "LINK", "META", "TITLE", "STYLE", "SCRIPT", "BODY" }; + private HashMap<String, Object> okHeadTagMap = null; + private final static String FRAMESET_TAG = "FRAMESET"; + private final static String BODY_TAG = "BODY"; + + private static BlockCSSStringTransformer cssBlockTrans = + new BlockCSSStringTransformer(); + private static InlineCSSStringTransformer cssInlineTrans = + new InlineCSSStringTransformer(); + private static JSStringTransformer jsBlockTrans = + new JSStringTransformer(); + private static MetaRefreshUrlStringTransformer metaRefreshTrans = + new MetaRefreshUrlStringTransformer(); + private static URLStringTransformer anchorUrlTrans = + new URLStringTransformer(); + private static URLStringTransformer cssUrlTrans = + new URLStringTransformer("cs_"); + private static URLStringTransformer jsUrlTrans = + new URLStringTransformer("js_"); + private static URLStringTransformer imageUrlTrans = + new URLStringTransformer("im_"); + + /** Constructor... */ + public FastArchivalUrlReplayParseEventHandler() { + okHeadTagMap = new HashMap<String, Object>(okHeadTags.length); + for (String tag : okHeadTags) { + okHeadTagMap.put(tag, null); + } + } + + // TODO: This should all be refactored up into an abstract base class with + // default no-op methods, allowing a subclass to only override the ones they + // want... + public void handleNode(ParseContext pContext, Node node) + throws IOException { + ReplayParseContext context = (ReplayParseContext) pContext; + if(NodeUtils.isRemarkNode(node)) { +// RemarkNode remarkNode = (RemarkNode) node; +// handleRemarkTextNode(context,remarkNode); + emit(context,null,node,null); + + } else if(NodeUtils.isTextNode(node)) { + TextNode textNode = (TextNode) node; + if(context.isInCSS()) { + handleCSSTextNode(context,textNode); + + } else if(context.isInScriptText()) { + handleJSTextNode(context,textNode); + } else { + emit(context,null,textNode,null); +// handleContentTextNode(context,textNode); + } + } else if(NodeUtils.isTagNode(node)) { + TagNode tagNode = (TagNode) node; + if(tagNode.isEndTag()) { + emit(context,null,tagNode,null); +// handleCloseTagNode(context,tagNode); + } else { + // assume start, possibly empty: + handleOpenTagNode(context,tagNode); + } + } else { + throw new IllegalArgumentException("Unknown node type.."); + } + } + + /** + * @param context + * @param textNode + * @throws IOException + */ + private void handleCSSTextNode(ReplayParseContext context, TextNode textNode) throws IOException { + textNode.setText(cssBlockTrans.transform(context, textNode.getText())); + emit(context,null,textNode,null); + } + /** + * @param context + * @param textNode + * @throws IOException + */ + private void handleJSTextNode(ReplayParseContext context, TextNode textNode) throws IOException { + textNode.setText(jsBlockTrans.transform(context, textNode.getText())); + emit(context,null,textNode,null); + } + + private void handleOpenTagNode(ReplayParseContext context, TagNode tagNode) + throws IOException { + + boolean insertedJsp = context.getData(FERRET_DONE_KEY) != null; + String preEmit = null; + String postEmit = null; + + String tagName = tagNode.getTagName(); + // Time to insert the JSP header? + if(!insertedJsp) { + if(!okHeadTagMap.containsKey(tagName)) { + if(tagName.equals(FRAMESET_TAG)) { + // don't put the insert in framsets: + } else { + String tmp = null; + try { + tmp = + context.getJspExec().jspToString(jspInsertPath); + } catch (ServletException e) { + e.printStackTrace(); + } + if (tagName.equals(BODY_TAG)) { + // insert it now, *after* the current Tag: + postEmit = tmp; + } else { + // hrm... we are seeing a node that should be in + // the body.. lets emit the jsp now, *before* + // the current Tag: + preEmit = tmp; + } + } + context.putData(FERRET_DONE_KEY,""); + } + } + // now do all the usual attribute rewriting: + // this could be slightly optimized by moving tags more likely to occur + // to the front of the if/else if/else if routing... + + if(tagName.equals("A")) { + transformAttr(context, tagNode, "HREF", anchorUrlTrans); + + } else if(tagName.equals("APPLET")) { + transformAttr(context, tagNode, "CODEBASE", anchorUrlTrans); + transformAttr(context, tagNode, "ARCHIVE", anchorUrlTrans); + + } else if(tagName.equals("AREA")) { + transformAttr(context, tagNode, "HREF", anchorUrlTrans); + + } else if(tagName.equals("BASE")) { + String orig = tagNode.getAttribute("HREF"); + if(orig != null) { + try { + context.setBaseUrl(new URL(orig)); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + } + + } else if(tagName.equals("EMBED")) { + transformAttr(context, tagNode, "SRC", anchorUrlTrans); + + } else if(tagName.equals("IFRAME")) { + transformAttr(context, tagNode, "SRC", anchorUrlTrans); + + } else if(tagName.equals("IMG")) { + transformAttr(context, tagNode, "SRC", imageUrlTrans); + + } else if(tagName.equals("INPUT")) { + transformAttr(context, tagNode, "SRC", imageUrlTrans); + + } else if(tagName.equals("FORM")) { + transformAttr(context, tagNode, "ACTION", anchorUrlTrans); + + } else if(tagName.equals("FRAME")) { + transformAttr(context, tagNode, "SRC", anchorUrlTrans); + + } else if(tagName.equals("LINK")) { + if(transformAttrWhere(context, tagNode, "REL", "STYLESHEET", + "HREF",cssUrlTrans)) { + // no-op + } else if(transformAttrWhere(context,tagNode,"REL","SHORTCUT ICON", + "HREF", imageUrlTrans)) { + // no-op + } else { + transformAttr(context, tagNode, "HREF", anchorUrlTrans); + } + + } else if(tagName.equals("META")) { + transformAttrWhere(context, tagNode, "HTTP-EQUIV", "REFRESH", + "CONTENT", metaRefreshTrans); + transformAttr(context, tagNode, "URL", anchorUrlTrans); + + } else if(tagName.equals("OBJECT")) { + transformAttr(context, tagNode, "CODEBASE", anchorUrlTrans); + transformAttr(context, tagNode, "CDATA", anchorUrlTrans); + + } else if(tagName.equals("SCRIPT")) { + transformAttr(context, tagNode, "SRC", jsUrlTrans); + } + // now, for *all* tags... + transformAttr(context,tagNode,"BACKGROUND", imageUrlTrans); + transformAttr(context,tagNode,"STYLE", cssInlineTrans); + transformAttr(context,tagNode,"onclick", jsBlockTrans); + + emit(context,preEmit,tagNode,postEmit); + } + + private void emit(ReplayParseContext context, String pre, Node node, + String post) throws IOException { + + OutputStream out = context.getOutputStream(); + if(out != null) { + Charset charset = Charset.forName(context.getOutputCharset()); + + if(pre != null) { + + out.write(pre.getBytes(charset)); + } + + out.write(node.toHtml(true).getBytes(charset)); + + if(post != null) { + + out.write(post.getBytes(charset)); + } + } + } + + /** + * Transform a particular attribute on a TagNode, if that TagNode has a + * previous value for the updated attribute, AND if that TagNode contains + * another named attribute with a specific value. + * + * @param context the ReplayParseContext + * @param node the TagNode to be updated + * @param attrName update only occurs if the TagNode has an attribute with + * this name. + * @param attrVal update only occurs if the TagNode has an attribute + * attrName has this value, case insensitive. In fact as an optimization, + * it is ASSUMED that this argument is already UPPER-CASED + * @param modAttr the attribute value to update + * @param transformer the StringTransformer responsible for creating the + * new value based on the old one. + * @return true if the attribute was updated. + */ + private boolean transformAttrWhere(ReplayParseContext context, TagNode node, + String attrName, String attrVal, String modAttr, + StringTransformer transformer) { + String val = node.getAttribute(attrName); + if(val != null) { + if(val.toUpperCase().equals(attrVal)) { + return transformAttr(context,node,modAttr,transformer); + } + } + return false; + } + /** + * Transform a particular attribute on a TagNode, iff that attribute exists + * + * @param context The ReplayParseContext being transformed + * @param node the TagNode to update + * @param attr the attribute name to transform + * @param transformer the StringTransformer responsible for creating the + * new value + * @return true if the attribute was found and updated + */ + private boolean transformAttr(ReplayParseContext context, TagNode node, + String attr, StringTransformer transformer) { + String orig = node.getAttribute(attr); + if(orig != null) { + node.setAttribute(attr, + transformer.transform(context, orig)); + return true; + } + return false; + } + public void handleParseComplete(ParseContext context) throws IOException { + // Nothing to do. + } + + /** + * @return the jspInsertPath + */ + public String getJspInsertPath() { + return jspInsertPath; + } + + /** + * @param jspInsertPath the jspInsertPath to set + */ + public void setJspInsertPath(String jspInsertPath) { + this.jspInsertPath = jspInsertPath; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandler.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3092 http://archive-access.svn.sourceforge.net/archive-access/?rev=3092&view=rev Author: bradtofel Date: 2010-05-17 19:26:12 +0000 (Mon, 17 May 2010) Log Message: ----------- TWEAK: changed declared event delgator to ParseEventHandler Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2010-05-14 22:52:50 UTC (rev 3091) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlSAXRewriteReplayRenderer.java 2010-05-17 19:26:12 UTC (rev 3092) @@ -44,11 +44,12 @@ import org.archive.wayback.replay.HttpHeaderOperation; import org.archive.wayback.replay.HttpHeaderProcessor; import org.archive.wayback.replay.JSPExecutor; +import org.archive.wayback.replay.TextReplayRenderer; import org.archive.wayback.replay.charset.CharsetDetector; import org.archive.wayback.replay.charset.StandardCharsetDetector; -import org.archive.wayback.replay.html.ReplayParseEventDelegator; import org.archive.wayback.replay.html.ReplayParseContext; import org.archive.wayback.util.htmllex.ContextAwareLexer; +import org.archive.wayback.util.htmllex.ParseEventHandler; import org.htmlparser.Node; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; @@ -62,7 +63,7 @@ * */ public class ArchivalUrlSAXRewriteReplayRenderer implements ReplayRenderer { - private ReplayParseEventDelegator delegator = null; + private ParseEventHandler delegator = null; private HttpHeaderProcessor httpHeaderProcessor; private CharsetDetector charsetDetector = new StandardCharsetDetector(); private final static String OUTPUT_CHARSET = "utf-8"; @@ -142,7 +143,7 @@ // set the corrected length: headers.put(HttpHeaderOperation.HTTP_LENGTH_HEADER, String.valueOf(utf8Bytes.length)); - headers.put("X-Wayback-Guessed-Charset", charSet); + headers.put(TextReplayRenderer.GUESSED_CHARSET_HEADER, charSet); // send back the headers: HttpHeaderOperation.sendHeaders(headers, httpResponse); @@ -173,14 +174,14 @@ /** * @return the delegator */ - public ReplayParseEventDelegator getDelegator() { + public ParseEventHandler getDelegator() { return delegator; } /** * @param delegator the delegator to set */ - public void setDelegator(ReplayParseEventDelegator delegator) { + public void setDelegator(ParseEventHandler delegator) { this.delegator = delegator; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-14 22:52:56
|
Revision: 3091 http://archive-access.svn.sourceforge.net/archive-access/?rev=3091&view=rev Author: bradtofel Date: 2010-05-14 22:52:50 +0000 (Fri, 14 May 2010) Log Message: ----------- FEATURE: added discardStream() method JAVADOC Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2010-05-13 18:34:37 UTC (rev 3090) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/ByteOp.java 2010-05-14 22:52:50 UTC (rev 3091) @@ -28,34 +28,100 @@ import java.io.InputStream; import java.io.OutputStream; +/** + * Byte oriented static methods. Likely a lot of overlap with apache- commons + * stuff - eventually should be reconciled. + * + * @author brad + * + */ public class ByteOp { + /** Default buffer size for IO ops */ public final static int BUFFER_SIZE = 4096; + /** + * Create a new byte array with contents initialized to values from the + * argument byte array. + * @param src source byte array of initial values + * @param offset start offset to copy bytes + * @param length number of bytes to copy + * @return a new byte array of size length, containing values from src + * starting from offset in the src array. + */ public static byte[] copy(byte[] src, int offset, int length) { byte[] copy = new byte[length]; System.arraycopy(src, offset, copy, 0, length); return copy; } - public static boolean cmp(byte[] input, byte[] want) { - if(input.length != want.length) { + + /** + * Compare two byte arrays + * @param a byte array to compare + * @param b byte array to compare + * @return true if a and b have same length, and all the same values, false + * otherwise + */ + public static boolean cmp(byte[] a, byte[] b) { + if(a.length != b.length) { return false; } - for(int i = 0; i < input.length; i++) { - if(input[i] != want[i]) { + for(int i = 0; i < a.length; i++) { + if(a[i] != b[i]) { return false; } } return true; } + /** + * throw away all bytes from stream argument + * @param is InputStream to read and discard + * @throws IOException when is throws one + */ public static void discardStream(InputStream is) throws IOException { discardStream(is,BUFFER_SIZE); } + + /** + * throw away all bytes from stream argument + * @param is InputStream to read and discard + * @param size number of bytes to read at once from the stream + * @throws IOException when is throws one + */ public static void discardStream(InputStream is,int size) throws IOException { byte[] buffer = new byte[size]; while(is.read(buffer, 0, size) != -1) { } } + + /** + * throw away all bytes from stream argument, and count how many bytes were + * discarded before reaching the end of the stream. + * @param is InputStream to read and discard + * @return the number of bytes discarded + * @throws IOException when is throws one + */ + public static long discardStreamCount(InputStream is) throws IOException { + return discardStreamCount(is, BUFFER_SIZE); + } + + /** + * throw away all bytes from stream argument, and count how many bytes were + * discarded before reaching the end of the stream. + * @param is InputStream to read and discard + * @param size number of bytes to read at once from the stream + * @return the number of bytes discarded + * @throws IOException when is throws one + */ + public static long discardStreamCount(InputStream is,int size) throws IOException { + long count = 0; + byte[] buffer = new byte[size]; + int amt = 0; + while((amt = is.read(buffer, 0, size)) != -1) { + count += amt; + } + return count; + } /** * Write all bytes from is to os. Does not close either stream. @@ -67,6 +133,7 @@ throws IOException { copyStream(is,os,BUFFER_SIZE); } + /** * Write all bytes from is to os. Does not close either stream. * @param is to copy bytes from This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-13 18:34:44
|
Revision: 3090 http://archive-access.svn.sourceforge.net/archive-access/?rev=3090&view=rev Author: bradtofel Date: 2010-05-13 18:34:37 +0000 (Thu, 13 May 2010) Log Message: ----------- FEATURE: added new method stripDefaultPort() and tests for that TWEAK: Updated TLD list Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-05-07 23:11:24 UTC (rev 3089) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-05-13 18:34:37 UTC (rev 3090) @@ -101,31 +101,29 @@ */ public final static char PATH_START = '/'; - - private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" + - "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" + - "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" + - "|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo" + - "|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk" + - "|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg" + - "|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma" + - "|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz" + - "|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm" + - "|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj" + - "|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn" + - "|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu" + - "|wf|ws|ye|yt|yu|za|zm|zw"; - - private static final String GEN_TLDS = "aero|biz|cat|com|coop|edu|gov" + - "|info|int|jobs|mil|mobi|museum|name|net|org|pro|travel"; - - - private static final String ALL_TLD_PATTERN = CC_TLDS + "|" + GEN_TLDS; + private static final String ALL_TLDS = "ac|ad|ae|aero|af|ag|ai|al|am|an" + + "|ao|aq|ar|arpa|as|asia|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi" + + "|biz|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cat|cc|cd|cf|cg|ch|ci" + + "|ck|cl|cm|cn|co|com|coop|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec" + + "|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh" + + "|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id" + + "|ie|il|im|in|info|int|io|iq|ir|is|it|je|jm|jo|jobs|jp|ke|kg|kh" + + "|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc" + + "|md|me|mg|mh|mil|mk|ml|mm|mn|mo|mobi|mp|mq|mr|ms|mt|mu|museum" + + "|mv|mw|mx|my|mz|na|name|nc|ne|net|nf|ng|ni|nl|no|np|nr|nu|nz" + + "|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pro|ps|pt|pw|py|qa|re|ro" + + "|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv" + + "|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|travel|tt|tv" + + "|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|xn--0zwm56d" + + "|xn--11b5bs3a9aj6g|xn--80akhbyknj4f|xn--9t4b11yi5a|xn--deba0ad" + + "|xn--g6w251d|xn--hgbk6aj7f53bba|xn--hlcj6aya9esc7a|xn--jxalpdlp" + + "|xn--kgbechtv|xn--mgbaam7a8h|xn--mgberp4a5d4ar|xn--p1ai" + + "|xn--wgbh1c|xn--zckzah|ye|yt|za|zm|zw"; private static final String IP_PATTERN = "[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+"; private static final Pattern AUTHORITY_REGEX = - Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLD_PATTERN + "))|" + + Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLDS + "))|" + "(" + IP_PATTERN + ")"); // private static final Pattern AUTHORITY_REGEX_SIMPLE = @@ -244,7 +242,48 @@ return url.substring(pathIdx); } } + + /** + * Attempt to strip default ports out of URL strings. + * @param url the original URL possibly including a port + * @return the URL sans port, if the scheme was recognized and the default + * port was supplied, otherwise, the original URL. + */ + public static String stripDefaultPortFromUrl(String url) { + String scheme = urlToScheme(url); + if(scheme == null) { + return url; + } + int defaultPort = schemeToDefaultPort(scheme); + if(defaultPort == -1) { + return url; + } + String portStr = null; + // is there a slash after the scheme? + int slashIdx = url.indexOf('/', scheme.length()); + if(slashIdx == -1) { + portStr = String.format(":%d", defaultPort); + if(url.endsWith(portStr)) { + return url.substring(0,url.length() - portStr.length()); + } + } + portStr = String.format(":%d/", defaultPort); + int idx = url.indexOf(portStr); + if(idx == -1) { + return url; + } + // if that occurred before the first / (after the scheme) then strip it: + if(slashIdx < idx) { + return url; + } + // we want to strip out the portStr: + StringBuilder sb = new StringBuilder(url.length()); + sb.append(url.substring(0,idx)); + sb.append(url.substring(idx + (portStr.length()-1))); + return sb.toString(); + } + /** * Attempt to extract the hostname component of an absolute URL argument. * @param url the url String from which to extract the hostname Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-07 23:11:24 UTC (rev 3089) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-05-13 18:34:37 UTC (rev 3090) @@ -161,4 +161,35 @@ } + + public void testStripDefaultPort() { + assertSDP("http://foo.com/","http://foo.com/"); + assertSDP("http://foo.com","http://foo.com"); + assertSDP("http://foo.com","http://foo.com:80"); + assertSDP("foo.com:80/","foo.com:80/"); + assertSDP("http://foo.com:8080/","http://foo.com:8080/"); + assertSDP("http://foo.com:8081/","http://foo.com:8081/"); + assertSDP("https://foo.com:8081/","https://foo.com:8081/"); + assertSDP("https://foo.com/","https://foo.com:443/"); + assertSDP("https://foo.com","https://foo.com:443"); + assertSDP("ftp://foo.com/","ftp://foo.com/"); + assertSDP("ftp://foo.com","ftp://foo.com"); + assertSDP("ftp://foo.com:1234","ftp://foo.com:1234"); + assertSDP("ftp://foo.com","ftp://foo.com:21"); + assertSDP("ftp://foo.com/","ftp://foo.com:21/"); + assertSDP("ftp://foo.com/bla","ftp://foo.com:21/bla"); + assertSDP("s3://foo.com/","s3://foo.com/"); + assertSDP("s3://foo.com/bar","s3://foo.com/bar"); + assertSDP("s3://foo.com:80/bar","s3://foo.com:80/bar"); + assertSDP("http://b...@fo.../bar","http://b...@fo...:80/bar"); + assertSDP("http://b...@fo.../bar","http://b...@fo.../bar"); + assertSDP("http://b:80...@fo.../bar","http://b:80...@fo.../bar"); + assertSDP("http://b:80...@fo.../bar","http://b:80...@fo...:80/bar"); + assertSDP("http://b:80...@fo...:8080/ba","http://b:80...@fo...:8080/ba"); + } + private void assertSDP(String want, String orig) { + String got = UrlOperations.stripDefaultPortFromUrl(orig); + assertEquals(want,got); + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-07 23:11:33
|
Revision: 3089 http://archive-access.svn.sourceforge.net/archive-access/?rev=3089&view=rev Author: bradtofel Date: 2010-05-07 23:11:24 +0000 (Fri, 07 May 2010) Log Message: ----------- BUGFIX: was setting path prefix to "//" for requests to "/" Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java 2010-05-07 23:10:16 UTC (rev 3088) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/webapp/RequestMapper.java 2010-05-07 23:11:24 UTC (rev 3089) @@ -148,7 +148,7 @@ portMapper.addRequestHandler(host, path, requestHandler); } - private RequestHandlerContext mapRequest(HttpServletRequest request) { + public RequestHandlerContext mapRequest(HttpServletRequest request) { RequestHandlerContext handlerContext = null; int port = request.getLocalPort(); @@ -181,8 +181,12 @@ if(handlerContext != null) { RequestHandler requestHandler = handlerContext.getRequestHandler(); - request.setAttribute(REQUEST_CONTEXT_PREFIX, - handlerContext.getPathPrefix() + "/"); + // need to add trailing "/" iff prefix is not "/": + String pathPrefix = handlerContext.getPathPrefix(); + if(!pathPrefix.equals("/")) { + pathPrefix += "/"; + } + request.setAttribute(REQUEST_CONTEXT_PREFIX,pathPrefix); handled = requestHandler.handleRequest(request, response); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3088 http://archive-access.svn.sourceforge.net/archive-access/?rev=3088&view=rev Author: bradtofel Date: 2010-05-07 23:10:16 +0000 (Fri, 07 May 2010) Log Message: ----------- FEATURE: now by default, blocks "alexa/dat" MIME records which may have gotten into the index... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java 2010-05-07 23:09:30 UTC (rev 3087) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java 2010-05-07 23:10:16 UTC (rev 3088) @@ -32,6 +32,7 @@ import org.archive.wayback.resourceindex.filters.ConditionalGetAnnotationFilter; import org.archive.wayback.resourceindex.filters.DuplicateRecordFilter; import org.archive.wayback.resourceindex.filters.GuardRailFilter; +import org.archive.wayback.resourceindex.filters.MimeTypeFilter; import org.archive.wayback.resourceindex.filters.UserInfoInAuthorityFilter; import org.archive.wayback.resourceindex.filters.WARCRevisitAnnotationFilter; import org.archive.wayback.util.ObjectFilter; @@ -39,6 +40,8 @@ public class CoreCaptureFilterGroup implements CaptureFilterGroup { private ObjectFilterChain<CaptureSearchResult> chain = null; + private MimeTypeFilter mimeExcludeFilter = new MimeTypeFilter(); + private static String ALEXA_DAT_MIME = "alexa/dat"; public CoreCaptureFilterGroup(LocalResourceIndex index) { chain = new ObjectFilterChain<CaptureSearchResult>(); @@ -48,7 +51,11 @@ chain.addFilter(new WARCRevisitAnnotationFilter()); chain.addFilter(new ConditionalGetAnnotationFilter()); } + MimeTypeFilter mimeExcludeFilter = new MimeTypeFilter(); + mimeExcludeFilter.addMime(ALEXA_DAT_MIME); + mimeExcludeFilter.setIncludeIfContains(false); chain.addFilter(new UserInfoInAuthorityFilter()); + chain.addFilter(mimeExcludeFilter); } public List<ObjectFilter<CaptureSearchResult>> getFilters() { return chain.getFilters(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3087 http://archive-access.svn.sourceforge.net/archive-access/?rev=3087&view=rev Author: bradtofel Date: 2010-05-07 23:09:30 +0000 (Fri, 07 May 2010) Log Message: ----------- FEATURE: added an inverting "includeIfContains" setting, which allows blocking of specific MIME-TYPEs from the index Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java 2010-05-07 23:07:58 UTC (rev 3086) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/MimeTypeFilter.java 2010-05-07 23:09:30 UTC (rev 3087) @@ -38,6 +38,7 @@ */ public class MimeTypeFilter implements ObjectFilter<CaptureSearchResult> { private HashMap<String,Integer> validMimes = null; + private boolean includeIfContains = true; /** * @param mime String which is valid match for mime-type field @@ -46,7 +47,7 @@ if(validMimes == null) { validMimes = new HashMap<String, Integer>(); } - validMimes.put(mime.toLowerCase(),new Integer(1)); + validMimes.put(mime.toLowerCase(),null); } /* (non-Javadoc) @@ -54,6 +55,21 @@ */ public int filterObject(CaptureSearchResult r) { String mime = r.getMimeType().toLowerCase(); - return validMimes.containsKey(mime) ? FILTER_INCLUDE : FILTER_EXCLUDE; + return validMimes.containsKey(mime) == includeIfContains ? + FILTER_INCLUDE : FILTER_EXCLUDE; } + + /** + * @return the includeIfContains + */ + public boolean isIncludeIfContains() { + return includeIfContains; + } + + /** + * @param includeIfContains the includeIfContains to set + */ + public void setIncludeIfContains(boolean includeIfContains) { + this.includeIfContains = includeIfContains; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3086 http://archive-access.svn.sourceforge.net/archive-access/?rev=3086&view=rev Author: bradtofel Date: 2010-05-07 23:07:58 +0000 (Fri, 07 May 2010) Log Message: ----------- JAVADOC Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-05-07 23:04:35 UTC (rev 3085) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-05-07 23:07:58 UTC (rev 3086) @@ -27,12 +27,19 @@ import org.archive.wayback.replay.html.ReplayParseContext; import org.archive.wayback.replay.html.StringTransformer; +/** + * @author brad + * + */ public class URLStringTransformer implements StringTransformer { private static final String MAILTO_PREFIX = "mailto:"; private String flags; - public URLStringTransformer() { - - } + /** Default constructor */ + public URLStringTransformer() {} + /** + * Flag-setting constructor + * @param flags flags to pass to ReplayParseContext.contextualizeUrl() + */ public URLStringTransformer(String flags) { this.flags = flags; } @@ -44,16 +51,12 @@ return context.contextualizeUrl(url, flags); } - /** - * @return the flags - */ + /** @return the flags */ public String getFlags() { return flags; } - /** - * @param flags the flags to set - */ + /** @param flags the flags to set */ public void setFlags(String flags) { this.flags = flags; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3085 http://archive-access.svn.sourceforge.net/archive-access/?rev=3085&view=rev Author: bradtofel Date: 2010-05-07 23:04:35 +0000 (Fri, 07 May 2010) Log Message: ----------- TWEAK: added constructor which allows flags to be set at construction time Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-04-30 21:15:45 UTC (rev 3084) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java 2010-05-07 23:04:35 UTC (rev 3085) @@ -30,6 +30,12 @@ public class URLStringTransformer implements StringTransformer { private static final String MAILTO_PREFIX = "mailto:"; private String flags; + public URLStringTransformer() { + + } + public URLStringTransformer(String flags) { + this.flags = flags; + } public String transform(ReplayParseContext context, String url) { if(url.startsWith(MAILTO_PREFIX)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-30 21:15:53
|
Revision: 3084 http://archive-access.svn.sourceforge.net/archive-access/?rev=3084&view=rev Author: bradtofel Date: 2010-04-30 21:15:45 +0000 (Fri, 30 Apr 2010) Log Message: ----------- FEATURE: made HTTP Header guessedCharsetHeader field bean configurable, and changed it's default value. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-04-30 02:16:27 UTC (rev 3083) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextReplayRenderer.java 2010-04-30 21:15:45 UTC (rev 3084) @@ -50,6 +50,9 @@ */ public abstract class TextReplayRenderer implements ReplayRenderer { + private static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset"; + + private String guessedCharsetHeader = GUESSED_CHARSET_HEADER; private List<String> jspInserts = null; private HttpHeaderProcessor httpHeaderProcessor; private CharsetDetector charsetDetector = new StandardCharsetDetector(); @@ -89,16 +92,20 @@ // set the corrected length: int bytes = page.getBytes().length; + headers.put(HttpHeaderOperation.HTTP_LENGTH_HEADER, String.valueOf(bytes)); + if(guessedCharsetHeader != null) { + headers.put(guessedCharsetHeader, page.getCharSet()); + } + + // send back the headers: + HttpHeaderOperation.sendHeaders(headers, httpResponse); + // Tomcat will always send a charset... It's trying to be smarter than // we are. If the original page didn't include a "charset" as part of // the "Content-Type" HTTP header, then Tomcat will use the default.. // who knows what that is, or what that will do to the page.. // let's try explicitly setting it to what we used: - headers.put("X-Wayback-Guessed-Charset", page.getCharSet()); - - // send back the headers: - HttpHeaderOperation.sendHeaders(headers, httpResponse); httpResponse.setCharacterEncoding(page.getCharSet()); page.writeToOutputStream(httpResponse.getOutputStream()); @@ -131,4 +138,21 @@ public void setCharsetDetector(CharsetDetector charsetDetector) { this.charsetDetector = charsetDetector; } + + /** + * @return the String HTTP Header used to indicate what Wayback determined + * was the pages original charset + */ + public String getGuessedCharsetHeader() { + return guessedCharsetHeader; + } + + /** + * @param guessedCharsetHeader the String HTTP Header value used to indicate + * to clients what Wayback determined was the pages original charset. If set + * to null, the header will be omitted. + */ + public void setGuessedCharsetHeader(String guessedCharsetHeader) { + this.guessedCharsetHeader = guessedCharsetHeader; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3083 http://archive-access.svn.sourceforge.net/archive-access/?rev=3083&view=rev Author: bradtofel Date: 2010-04-30 02:16:27 +0000 (Fri, 30 Apr 2010) Log Message: ----------- TWEAK: removed System.out.format() debug line... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-30 02:14:51 UTC (rev 3082) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-30 02:16:27 UTC (rev 3083) @@ -62,23 +62,23 @@ private static final TimeZone TZ_UTC = TimeZone.getTimeZone("UTC"); - private static String joinInts(int[] a) { - StringBuilder sb = new StringBuilder(); - boolean first = true; - for(int i : a) { - if(first) { - sb.append(i); - first = false; - } else { - sb.append(",").append(i); - } - } - return sb.toString(); - } - private static void printAr(String name, int o[], int n[]) { - System.out.format("%s=========\nORIG(%s)\nNORM(%s)\n", - name,joinInts(o),joinInts(n)); - } +// private static String joinInts(int[] a) { +// StringBuilder sb = new StringBuilder(); +// boolean first = true; +// for(int i : a) { +// if(first) { +// sb.append(i); +// first = false; +// } else { +// sb.append(",").append(i); +// } +// } +// return sb.toString(); +// } +// private static void printAr(String name, int o[], int n[]) { +// System.out.format("%s=========\nORIG(%s)\nNORM(%s)\n", +// name,joinInts(o),joinInts(n)); +// } private static int normalizeInt(int input, int localMax, int maxOutput) { double ln = Math.log(localMax); @@ -108,7 +108,7 @@ if(input[i] > localMax) localMax = input[i]; } if(localMax < max) { - printAr("No normalization",input,input); +// printAr("No normalization",input,input); return input; } int normalized[] = new int[input.length]; @@ -123,8 +123,8 @@ double pct = iln / ln; double num = pct * max; int idx = (int) num; - System.out.format("%d - %d - %f - %f - %f - %f : %d\n", - i,input[i],ln,iln,pct,num,idx); +// System.out.format("%d - %d - %f - %f - %f - %f : %d\n", +// i,input[i],ln,iln,pct,num,idx); if(input[i] < idx) { normalized[i] = input[i]; } else { @@ -132,7 +132,7 @@ } } } - printAr("NORMALIZED",input,normalized); +// printAr("NORMALIZED",input,normalized); return normalized; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3082 http://archive-access.svn.sourceforge.net/archive-access/?rev=3082&view=rev Author: bradtofel Date: 2010-04-30 02:14:51 +0000 (Fri, 30 Apr 2010) Log Message: ----------- TWEAK: removed System.out.format() debug line... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-27 22:47:37 UTC (rev 3081) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/partition/PartitionsToGraph.java 2010-04-30 02:14:51 UTC (rev 3082) @@ -92,8 +92,8 @@ double pct = iln / ln; double num = pct * maxOutput; int idx = (int) num; - System.out.format("%d - %f - %f - %f - %f : %d\n", - input,ln,iln,pct,num,idx); +// System.out.format("%d - %f - %f - %f - %f : %d\n", +// input,ln,iln,pct,num,idx); if(input < idx) { return input; } else { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-27 22:47:45
|
Revision: 3081 http://archive-access.svn.sourceforge.net/archive-access/?rev=3081&view=rev Author: bradtofel Date: 2010-04-27 22:47:37 +0000 (Tue, 27 Apr 2010) Log Message: ----------- MAJOR REFACTOR of AccessPoint ServletContext linkage. Tons of code moved around, with a goal of not actually busting Wayback XML Spring configuration too badly.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -33,13 +33,13 @@ import org.apache.commons.httpclient.URIException; import org.apache.log4j.Logger; -import org.archive.wayback.webapp.ServletRequestContext; +import org.archive.wayback.util.webapp.AbstractRequestHandler; /** * @author brad * */ -public class ARCRecordingProxy extends ServletRequestContext { +public class ARCRecordingProxy extends AbstractRequestHandler { private final static String EXPIRES_HEADER = "Expires"; private long expiresMS = 60 * 60 * 1000; @@ -52,10 +52,7 @@ private ARCCacheDirectory arcCacheDir = null; private URLtoARCCacher cacher = null; - /* (non-Javadoc) - * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) - */ - @Override + public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -40,7 +40,7 @@ import org.apache.commons.httpclient.methods.GetMethod; import org.archive.io.arc.ARCRecord; import org.archive.wayback.util.ByteOp; -import org.archive.wayback.webapp.ServletRequestContext; +import org.archive.wayback.util.webapp.AbstractRequestHandler; /** * @@ -54,7 +54,7 @@ * @author brad * */ -public class ARCUnwrappingProxy extends ServletRequestContext { +public class ARCUnwrappingProxy extends AbstractRequestHandler { private MultiThreadedHttpConnectionManager connectionManager = null; private HostConfiguration hostConfiguration = null; @@ -66,13 +66,6 @@ hostConfiguration = new HostConfiguration(); } -// protected HttpClient http = new HttpClient( -// new MultiThreadedHttpConnectionManager()); - - /* (non-Javadoc) - * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) - */ - @Override public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { @@ -81,7 +74,6 @@ if(query != null) { sb.append("?").append(query); } -// URL url = new URL(sb.toString()); HttpMethod method = new GetMethod(sb.toString()); // method.addRequestHeader("User-Agent", userAgent); boolean got200 = false; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/BaseRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -78,34 +78,6 @@ AccessPoint wbContext) throws BadQueryException, BetterRequestException; - protected static String getMapParam(Map<String,String[]> queryMap, - String field) { - String arr[] = queryMap.get(field); - if (arr == null || arr.length == 0) { - return null; - } - return arr[0]; - } - - protected static String getRequiredMapParam(Map<String,String[]> queryMap, - String field) - throws BadQueryException { - String value = getMapParam(queryMap,field); - if(value == null) { - throw new BadQueryException("missing field " + field); - } - if(value.length() == 0) { - throw new BadQueryException("empty field " + field); - } - return value; - } - - protected static String getMapParamOrEmpty(Map<String,String[]> map, - String param) { - String val = getMapParam(map,param); - return (val == null) ? "" : val; - } - /** * @return the maxRecords to use with this RequestParser, when not specified * by the client request Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -59,7 +59,7 @@ * WaybackRequest object, except the Submit button argument. */ public WaybackRequest parse(HttpServletRequest httpRequest, - AccessPoint wbContext) { + AccessPoint accessPoint) { WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") @@ -67,7 +67,7 @@ if(queryMap.size() > 0) { wbRequest = new WaybackRequest(); - String base = wbContext.translateRequestPath(httpRequest); + String base = accessPoint.translateRequestPath(httpRequest); if(base.startsWith(REPLAY_BASE)) { wbRequest.setReplayRequest(); } else if(base.startsWith(QUERY_BASE)) { @@ -88,7 +88,7 @@ continue; } // just jam everything else in: - String val = getMapParam(queryMap,key); + String val = AccessPoint.getMapParam(queryMap,key); wbRequest.put(key,val); } String partialTS = wbRequest.getReplayTimestamp(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -88,7 +88,7 @@ WaybackRequest wbRequest = null; @SuppressWarnings("unchecked") Map<String,String[]> queryMap = httpRequest.getParameterMap(); - String query = getMapParam(queryMap, SEARCH_QUERY); + String query = AccessPoint.getMapParam(queryMap, SEARCH_QUERY); if(query == null) { return null; } @@ -107,8 +107,8 @@ return null; } - String numResults = getMapParam(queryMap, SEARCH_RESULTS); - String startPage = getMapParam(queryMap, START_PAGE); + String numResults = AccessPoint.getMapParam(queryMap, SEARCH_RESULTS); + String startPage = AccessPoint.getMapParam(queryMap, START_PAGE); if (numResults != null) { int nr = Integer.parseInt(numResults); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/PathRequestParser.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -33,7 +33,7 @@ /** * Subclass of RequestParser that acquires key request information from the - * path component following the wayback context. + * path component within the handling AccessPoint. * * @author brad * @version $Date$, $Revision$ @@ -41,43 +41,35 @@ public abstract class PathRequestParser extends WrappedRequestParser { /** - * @param wrapped + * @param wrapped the BaseRequestParser being wrapped */ public PathRequestParser(BaseRequestParser wrapped) { super(wrapped); } /** - * @param requestPath - * @param acessPoint - * @return WaybackRequest with information parsed from the requestPath, or - * null if information could not be extracted. - * @throws BetterRequestException - */ + * attempt to transform an incoming HttpServletRequest into a + * WaybackRequest object. returns null if there is missing information. + * + * @param requestPath the AccessPoint relative path as received by the + * AccessPoint + * @param accessPoint AccessPoint which is attempting to parse the request + * @return populated WaybackRequest object if successful, null otherwise. + * @throws BadQueryException if the request could match this AccessPoint, + * but is malformed: invalid datespec, URL, or flags + * @throws BetterRequestException if the request should be redirected to + * provide better user feedback (corrected URL/date in address bar) + */ public abstract WaybackRequest parse(String requestPath, - AccessPoint acessPoint) throws BetterRequestException; + AccessPoint accessPoint) throws BetterRequestException, + BadQueryException; - /* (non-Javadoc) - * @see org.archive.wayback.requestparser.BaseRequestParser#parse(javax.servlet.http.HttpServletRequest, org.archive.wayback.webapp.WaybackContext) - */ - @Override public WaybackRequest parse(HttpServletRequest httpRequest, - AccessPoint acessPoint) + AccessPoint accessPoint) throws BadQueryException, BetterRequestException { - String queryString = httpRequest.getQueryString(); - String origRequestPath = httpRequest.getRequestURI(); - - if (queryString != null) { - origRequestPath += "?" + queryString; - } - String contextPath = acessPoint.getContextPath(httpRequest); - if (!origRequestPath.startsWith(contextPath)) { - return null; - } - String requestPath = origRequestPath.substring(contextPath.length()); - - WaybackRequest wbRequest = parse(requestPath, acessPoint); + String requestPath = accessPoint.translateRequestPathQuery(httpRequest); + WaybackRequest wbRequest = parse(requestPath, accessPoint); if(wbRequest != null) { wbRequest.setResultsPerPage(getMaxRecords()); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -45,7 +45,7 @@ import org.archive.util.anvl.ANVLRecord; import org.archive.wayback.util.http.HttpRequestMessage; import org.archive.wayback.util.http.HttpResponse; -import org.archive.wayback.webapp.ServletRequestContext; +import org.archive.wayback.util.webapp.AbstractRequestHandler; /** * ServletRequestContext interface which uses a ResourceFileLocationDB to @@ -56,7 +56,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class FileProxyServlet extends ServletRequestContext { +public class FileProxyServlet extends AbstractRequestHandler { private static final Logger LOGGER = Logger.getLogger(FileProxyServlet.class .getName()); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -34,7 +34,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.archive.wayback.webapp.ServletRequestContext; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.util.webapp.AbstractRequestHandler; /** * ServletRequestContext enabling remote HTTP GET/POST access to a local @@ -44,7 +45,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class ResourceFileLocationDBServlet extends ServletRequestContext { +public class ResourceFileLocationDBServlet extends AbstractRequestHandler { protected static final String OPERATION_ARGUMENT = "operation"; protected static final String NAME_ARGUMENT = "name"; @@ -77,18 +78,22 @@ e.printStackTrace(); httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage()); + } catch(BadQueryException e) { + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, + e.getMessage()); } return true; } private String handleOperation(Map<String,String[]> queryMap) - throws ParseException { + throws ParseException, BadQueryException { - String operation = getRequiredMapParam(queryMap, OPERATION_ARGUMENT); + String operation = AbstractRequestHandler.getRequiredMapParam(queryMap, OPERATION_ARGUMENT); String message; try { if (operation.equals(LOOKUP_OPERATION)) { - String name = getRequiredMapParam(queryMap, NAME_ARGUMENT); + String name = AbstractRequestHandler.getRequiredMapParam(queryMap, NAME_ARGUMENT); message = NO_LOCATION_PREFIX + " " + name; String arcUrls[] = locationDB.nameToUrls(name); @@ -107,8 +112,8 @@ } else if (operation.equals(GETRANGE_OPERATION)) { - long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT)); - long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT)); + long start = Long.parseLong(AbstractRequestHandler.getRequiredMapParam(queryMap, START_ARGUMENT)); + long end = Long.parseLong(AbstractRequestHandler.getRequiredMapParam(queryMap, END_ARGUMENT)); Iterator<String> itr = locationDB.getNamesBetweenMarks(start,end); StringBuilder str = new StringBuilder(); str.append("OK "); @@ -120,8 +125,8 @@ } else { - String name = getRequiredMapParam(queryMap, NAME_ARGUMENT); - String url = getRequiredMapParam(queryMap, URL_ARGUMENT); + String name = AbstractRequestHandler.getRequiredMapParam(queryMap, NAME_ARGUMENT); + String url = AbstractRequestHandler.getRequiredMapParam(queryMap, URL_ARGUMENT); if (operation.equals(ADD_OPERATION)) { locationDB.addNameUrl(name, url); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -40,7 +40,6 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.RequestParser; import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.WaybackConstants; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; @@ -58,7 +57,8 @@ import org.archive.wayback.exception.WaybackException; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.operator.BooleanOperator; -import org.springframework.beans.factory.BeanNameAware; +import org.archive.wayback.util.webapp.AbstractRequestHandler; +import org.archive.wayback.util.webapp.ShutdownListener; /** * Retains all information about a particular Wayback configuration @@ -77,265 +77,38 @@ * @author brad * @version $Date$, $Revision$ */ -public class AccessPoint implements RequestContext, BeanNameAware { +public class AccessPoint extends AbstractRequestHandler +implements ShutdownListener { private static final Logger LOGGER = Logger.getLogger( AccessPoint.class.getName()); - private String liveWebPrefix = null; - - private boolean useServerName = false; - private boolean useAnchorWindow = false; - private boolean exactSchemeMatch = true; private boolean exactHostMatch = false; + private boolean exactSchemeMatch = true; + private boolean useAnchorWindow = false; + private boolean useServerName = false; - private int contextPort = 0; - private String contextName = null; - private String beanName = null; - private WaybackCollection collection = null; - private ReplayDispatcher replay = null; - private ExceptionRenderer exception = new BaseExceptionRenderer(); - private QueryRenderer query = null; - private RequestParser parser = null; - private ResultURIConverter uriConverter = null; - private Properties configs = null; - private ExclusionFilterFactory exclusionFactory = null; - private BooleanOperator<WaybackRequest> authentication = null; + private String liveWebPrefix = null; private String urlRoot = null; + private Locale locale = null; + + private Properties configs = null; + private List<String> filePatterns = null; private List<String> filePrefixes = null; - - /** - * @return List of file patterns that will be matched when querying the - * ResourceIndex - */ - public List<String> getFilePatterns() { - return filePatterns; - } - /** - * @param filePatterns List of file Patterns (regular expressions) that - * will be matched when querying the ResourceIndex - only SearchResults - * matching one of these patterns will be returned. - */ - public void setFilePatterns(List<String> filePatterns) { - this.filePatterns = filePatterns; - } + private WaybackCollection collection = null; + private ExceptionRenderer exception = new BaseExceptionRenderer(); + private QueryRenderer query = null; + private RequestParser parser = null; + private ReplayDispatcher replay = null; + private ResultURIConverter uriConverter = null; - /** - * @return List of file String prefixes that will be matched when querying - * the ResourceIndex - */ - public List<String> getFilePrefixes() { - return filePrefixes; - } - - /** - * @param filePrefixes List of String file prefixes that will be matched - * when querying the ResourceIndex - only SearchResults from files - * with a prefix matching one of those in this List will be returned. - */ - public void setFilePrefixes(List<String> filePrefixes) { - this.filePrefixes = filePrefixes; - } - - /** - * @return the contextName - */ - public String getContextName() { - return contextName; - } - - /** - * @return the replay - */ - public ReplayDispatcher getReplay() { - return replay; - } - - /** - * @return the query - */ - public QueryRenderer getQuery() { - return query; - } - - /** - * @return the parser - */ - public RequestParser getParser() { - return parser; - } - - /** - * @return the uriConverter - */ - public ResultURIConverter getUriConverter() { - return uriConverter; - } - - /** - * @return explicit Locale to use within this AccessPoint. - */ - public Locale getLocale() { - return locale; - } - - /** - * @param locale explicit Locale to use for requests within this - * AccessPoint. If not set, will attempt to use the one specified by - * each requests User Agent via HTTP headers - */ - public void setLocale(Locale locale) { - this.locale = locale; - } - - /** - * - */ - public AccessPoint() { - - } + private ExclusionFilterFactory exclusionFactory = null; + private BooleanOperator<WaybackRequest> authentication = null; - /* (non-Javadoc) - * @see org.springframework.beans.factory.BeanNameAware#setBeanName(java.lang.String) - */ - public void setBeanName(String beanName) { - this.beanName = beanName; - this.contextName = ""; - int idx = beanName.indexOf(":"); - if(idx > -1) { - contextPort = Integer.valueOf(beanName.substring(0,idx)); - contextName = beanName.substring(idx + 1); - } else { - try { - this.contextPort = Integer.valueOf(beanName); - } catch(NumberFormatException e) { - e.printStackTrace(); - } - } - } - /** - * @return the name of the bean in the Spring configuration which defined - * this AccessPoint. - */ - public String getBeanName() { - return beanName; - } - /** - * @param httpRequest HttpServletRequest which is being handled - * @return the prefix of paths received by this server that are handled by - * this WaybackContext, including the trailing '/' - */ - public String getContextPath(HttpServletRequest httpRequest) { - String httpContextPath = httpRequest.getContextPath(); - if(contextName.length() == 0) { - return httpContextPath + "/"; - } - return httpContextPath + "/" + contextName + "/"; - } - /** - * Remove any leading ServletContext and AccessPoint name path elements - * from the incoming request path, returning the result as a String - * - * @param httpRequest HttpServletRequest which is being handled - * @param includeQuery if true, include any query arguments - * @return the portion of the request following the path to this context - * without leading '/' - */ - protected String translateRequest(HttpServletRequest httpRequest, - boolean includeQuery) { - - String origRequestPath = httpRequest.getRequestURI(); - if(includeQuery) { - String queryString = httpRequest.getQueryString(); - if (queryString != null) { - origRequestPath += "?" + queryString; - } - } - String contextPath = getContextPath(httpRequest); - if (!origRequestPath.startsWith(contextPath)) { - if(contextPath.startsWith(origRequestPath)) { - // missing trailing '/', just omit: - return ""; - } - return null; - } - return origRequestPath.substring(contextPath.length()); - } - - /** - * Remove any leading ServletContext and AccessPoint name path elements - * from the incoming request path, returning the result as a String - - * @param httpRequest HttpServletRequest which is being handled - * @return the portion of the request following the path to this context, - * including any query information,without leading '/' - */ - public String translateRequestPathQuery(HttpServletRequest httpRequest) { - return translateRequest(httpRequest,true); - } - - /** - * @param httpRequest HttpServletRequest which is being handled - * @return the portion of the request following the path to this context, - * excluding any query information, without leading '/' - */ - public String translateRequestPath(HttpServletRequest httpRequest) { - return translateRequest(httpRequest,false); - } - - /** - * Construct an absolute URL that points to the root of the context that - * received the request, including a trailing "/". - * - * @return String absolute URL pointing to the Context root where the - * request was received. - */ - private String getAbsoluteContextPrefix(HttpServletRequest httpRequest, - boolean useRequestServer) { - - StringBuilder prefix = new StringBuilder(); - prefix.append(WaybackConstants.HTTP_URL_PREFIX); - String waybackPort = null; - if(useRequestServer) { - prefix.append(httpRequest.getLocalName()); - waybackPort = String.valueOf(httpRequest.getLocalPort()); - } else { - prefix.append(httpRequest.getServerName()); - waybackPort = String.valueOf(httpRequest.getServerPort()); - } - if (!waybackPort.equals(WaybackConstants.HTTP_DEFAULT_PORT)) { - prefix.append(":").append(waybackPort); - } - String contextPath = getContextPath(httpRequest); - prefix.append(contextPath); - return prefix.toString(); - } - - /** - * @param httpRequest HttpServletRequest which is being handled - * @return absolute URL pointing to the base of this WaybackContext, using - * Server and port information from the HttpServletRequest argument. - */ - public String getAbsoluteServerPrefix(HttpServletRequest httpRequest) { - return getAbsoluteContextPrefix(httpRequest, true); - } - - /** - * @param httpRequest HttpServletRequest which is being handled - * @return absolute URL pointing to the base of this WaybackContext, using - * Canonical server and port information. - */ - public String getAbsoluteLocalPrefix(HttpServletRequest httpRequest) { - if(urlRoot != null) { - return urlRoot; - } - return getAbsoluteContextPrefix(httpRequest, useServerName); - } - protected boolean dispatchLocal(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { @@ -343,7 +116,7 @@ String translated = "/" + translateRequestPathQuery(httpRequest); WaybackRequest wbRequest = new WaybackRequest(); - wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + wbRequest.setContextPrefix(getUrlRoot()); wbRequest.setAccessPoint(this); wbRequest.fixup(httpRequest); UIResults uiResults = new UIResults(wbRequest,uriConverter); @@ -355,7 +128,7 @@ } return false; } - + /** * @param httpRequest HttpServletRequest which is being handled * @param httpResponse HttpServletResponse which is being handled @@ -371,25 +144,28 @@ boolean handled = false; try { - wbRequest = parser.parse(httpRequest, this); + wbRequest = getParser().parse(httpRequest, this); if(wbRequest != null) { handled = true; // TODO: refactor this code into RequestParser implementations wbRequest.setAccessPoint(this); - wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); +// wbRequest.setContextPrefix(getAbsoluteLocalPrefix(httpRequest)); + wbRequest.setContextPrefix(getUrlRoot()); wbRequest.fixup(httpRequest); // end of refactor - if(authentication != null) { - if(!authentication.isTrue(wbRequest)) { - throw new AuthenticationControlException("Not authorized"); + if(getAuthentication() != null) { + if(!getAuthentication().isTrue(wbRequest)) { + throw new AuthenticationControlException( + "Unauthorized"); } } - if(exclusionFactory != null) { - ExclusionFilter exclusionFilter = exclusionFactory.get(); + if(getExclusionFactory() != null) { + ExclusionFilter exclusionFilter = + getExclusionFactory().get(); if(exclusionFilter == null) { throw new AdministrativeAccessControlException( "AccessControl list unavailable"); @@ -399,8 +175,8 @@ // TODO: refactor this into RequestParser implementations, so a // user could alter requests to change the behavior within a // single AccessPoint. For now, this is a simple way to expose - // the feature to configuration. - wbRequest.setExactScheme(exactSchemeMatch); + // the feature to configuration.g + wbRequest.setExactScheme(isExactSchemeMatch()); if(wbRequest.isReplayRequest()) { @@ -408,7 +184,7 @@ } else { - wbRequest.setExactHost(exactHostMatch); + wbRequest.setExactHost(isExactHostMatch()); handleQuery(wbRequest,httpRequest,httpResponse); } } else { @@ -422,20 +198,34 @@ } catch(WaybackException e) { boolean drawError = true; if(e instanceof ResourceNotInArchiveException) { - if(liveWebPrefix != null) { - String liveUrl = liveWebPrefix + wbRequest.getRequestUrl(); + if(getLiveWebPrefix() != null) { + String liveUrl = + getLiveWebPrefix() + wbRequest.getRequestUrl(); httpResponse.sendRedirect(liveUrl); drawError = false; } } if(drawError) { logNotInArchive(e,wbRequest); - exception.renderException(httpRequest, httpResponse, wbRequest, e, - uriConverter); + getException().renderException(httpRequest, httpResponse, + wbRequest, e, getUriConverter()); } } return handled; } + + private void logNotInArchive(WaybackException e, WaybackRequest r) { + // TODO: move this into ResourceNotInArchiveException constructor + if(e instanceof ResourceNotInArchiveException) { + String url = r.getRequestUrl(); + StringBuilder sb = new StringBuilder(100); + sb.append("NotInArchive\t"); + sb.append(getUrlRoot()).append("\t"); + sb.append(url); + + LOGGER.info(sb.toString()); + } + } private void handleReplay(WaybackRequest wbRequest, HttpServletRequest httpRequest, HttpServletResponse httpResponse) @@ -443,24 +233,31 @@ Resource resource = null; try { PerformanceLogger p = new PerformanceLogger("replay"); - SearchResults results = collection.getResourceIndex().query(wbRequest); + SearchResults results = + getCollection().getResourceIndex().query(wbRequest); p.queried(); if(!(results instanceof CaptureSearchResults)) { throw new ResourceNotAvailableException("Bad results..."); } - CaptureSearchResults captureResults = (CaptureSearchResults) results; + CaptureSearchResults captureResults = + (CaptureSearchResults) results; // TODO: check which versions are actually accessible right now? CaptureSearchResult closest = captureResults.getClosest(wbRequest, - useAnchorWindow); + isUseAnchorWindow()); closest.setClosest(true); - resource = collection.getResourceStore().retrieveResource(closest); + resource = + getCollection().getResourceStore().retrieveResource(closest); p.retrieved(); - ReplayRenderer renderer = replay.getRenderer(wbRequest, closest, resource); + ReplayRenderer renderer = + getReplay().getRenderer(wbRequest, closest, resource); + renderer.renderResource(httpRequest, httpResponse, wbRequest, - closest, resource, uriConverter, captureResults); + closest, resource, getUriConverter(), captureResults); + p.rendered(); - p.write(wbRequest.getReplayTimestamp() + " " + wbRequest.getRequestUrl()); + p.write(wbRequest.getReplayTimestamp() + " " + + wbRequest.getRequestUrl()); } finally { if(resource != null) { resource.close(); @@ -473,18 +270,19 @@ throws ServletException, IOException, WaybackException { PerformanceLogger p = new PerformanceLogger("query"); - SearchResults results = collection.getResourceIndex().query(wbRequest); + SearchResults results = + getCollection().getResourceIndex().query(wbRequest); p.queried(); if(results instanceof CaptureSearchResults) { CaptureSearchResults cResults = (CaptureSearchResults) results; cResults.markClosest(wbRequest); - query.renderCaptureResults(httpRequest,httpResponse,wbRequest, - cResults,uriConverter); + getQuery().renderCaptureResults(httpRequest,httpResponse,wbRequest, + cResults,getUriConverter()); } else if(results instanceof UrlSearchResults) { UrlSearchResults uResults = (UrlSearchResults) results; - query.renderUrlResults(httpRequest,httpResponse,wbRequest, - uResults,uriConverter); + getQuery().renderUrlResults(httpRequest,httpResponse,wbRequest, + uResults,getUriConverter()); } else { throw new WaybackException("Unknown index format"); } @@ -492,172 +290,199 @@ p.write(wbRequest.getRequestUrl()); } + /** * Release any resources associated with this AccessPoint, including * stopping any background processing threads - * - * @throws IOException per usual */ - public void shutdown() throws IOException { + public void shutdown() { if(collection != null) { - collection.shutdown(); + try { + collection.shutdown(); + } catch (IOException e) { + LOGGER.error("FAILED collection shutdown", e); + } } if(exclusionFactory != null) { exclusionFactory.shutdown(); } } - private void logNotInArchive(WaybackException e, WaybackRequest r) { - // TODO: move this into ResourceNotInArchiveException constructor - if(e instanceof ResourceNotInArchiveException) { - String url = r.getRequestUrl(); - StringBuilder sb = new StringBuilder(100); - sb.append("NotInArchive\t"); - sb.append(contextName).append("\t"); - sb.append(contextPort).append("\t"); - sb.append(url); - - LOGGER.info(sb.toString()); - } + /* + * ******************************************************************* + * ******************************************************************* + * + * ALL GETTER/SETTER BELOW HERE + * + * ******************************************************************* + * ******************************************************************* + */ + + /** + * @return the exactHostMatch + */ + public boolean isExactHostMatch() { + return exactHostMatch; } /** - * @param contextPort the contextPort to set + * @param exactHostMatch if true, then only SearchResults exactly matching + * the requested hostname will be returned from this AccessPoint. If + * false, then hosts which canonicalize to the same host as requested + * hostname will be returned (www.) */ - public void setContextPort(int contextPort) { - this.contextPort = contextPort; + public void setExactHostMatch(boolean exactHostMatch) { + this.exactHostMatch = exactHostMatch; } /** - * @param contextName the contextName to set + * @return the exactSchemeMatch */ - public void setContextName(String contextName) { - this.contextName = contextName; + public boolean isExactSchemeMatch() { + return exactSchemeMatch; } /** - * @param replay the replay to set + * @param exactSchemeMatch the exactSchemeMatch to set */ - public void setReplay(ReplayDispatcher replay) { - this.replay = replay; + public void setExactSchemeMatch(boolean exactSchemeMatch) { + this.exactSchemeMatch = exactSchemeMatch; } /** - * @param query the query to set + * @return true if this AccessPoint is configured to useAnchorWindow, that + * is, to replay documents only if they are within a certain proximity to + * the users requested AnchorDate */ - public void setQuery(QueryRenderer query) { - this.query = query; + public boolean isUseAnchorWindow() { + return useAnchorWindow; } /** - * @param parser the parser to set + * @param useAnchorWindow , when set to true, causes this AccessPoint to + * only replay documents if they are within a certain proximity to + * the users requested AnchorDate */ - public void setParser(RequestParser parser) { - this.parser = parser; + public void setUseAnchorWindow(boolean useAnchorWindow) { + this.useAnchorWindow = useAnchorWindow; } /** - * @param uriConverter the uriConverter to set + * @return the useServerName + * @deprecated no longer used, use urlPrefix */ - public void setUriConverter(ResultURIConverter uriConverter) { - this.uriConverter = uriConverter; + public boolean isUseServerName() { + return useServerName; } - /** - * @return the contextPort + * @param useServerName the useServerName to set + * @deprecated no longer used, use urlPrefix */ - public int getContextPort() { - return contextPort; + public void setUseServerName(boolean useServerName) { + this.useServerName = useServerName; } /** - * @return the configs + * @return the liveWebPrefix String to use, or null, if this AccessPoint + * does not use the Live Web to fill in documents missing from the archive */ - public Properties getConfigs() { - return configs; + public String getLiveWebPrefix() { + return liveWebPrefix; } /** - * @param configs the configs to set + * @param liveWebPrefix the String URL prefix to use to attempt to retrieve + * documents missing from the collection from the live web, on demand. */ - public void setConfigs(Properties configs) { - this.configs = configs; + public void setLiveWebPrefix(String liveWebPrefix) { + this.liveWebPrefix = liveWebPrefix; } /** - * @return the useServerName + * @return the String url prefix to use when generating self referencing + * URLs */ - public boolean isUseServerName() { - return useServerName; + public String getUrlRoot() { + return urlRoot; } /** - * @param useServerName the useServerName to set + * @param urlRoot explicit URL prefix to use when creating self referencing + * URLs */ - public void setUseServerName(boolean useServerName) { - this.useServerName = useServerName; + public void setUrlRoot(String urlRoot) { + this.urlRoot = urlRoot; } /** - * @return the useAnchorWindow + * @return explicit Locale to use within this AccessPoint. */ - public boolean isUseAnchorWindow() { - return useAnchorWindow; + public Locale getLocale() { + return locale; } /** - * @param useAnchorWindow the useAnchorWindow to set + * @param locale explicit Locale to use for requests within this + * AccessPoint. If not set, will attempt to use the one specified by + * each requests User Agent via HTTP headers */ - public void setUseAnchorWindow(boolean useAnchorWindow) { - this.useAnchorWindow = useAnchorWindow; + public void setLocale(Locale locale) { + this.locale = locale; } - + /** - * @return the exactSchemeMatch + * @return the generic customization Properties used with this AccessPoint, + * generally to tune the UI */ - public boolean isExactSchemeMatch() { - return exactSchemeMatch; + public Properties getConfigs() { + return configs; } /** - * @param exactSchemeMatch the exactSchemeMatch to set + * @param configs the generic customization Properties to use with this + * AccessPoint, generally used to tune the UI */ - public void setExactSchemeMatch(boolean exactSchemeMatch) { - this.exactSchemeMatch = exactSchemeMatch; + public void setConfigs(Properties configs) { + this.configs = configs; } /** - * @return the ExclusionFilterFactory in use with this AccessPoint + * @return List of file patterns that will be matched when querying the + * ResourceIndex */ - public ExclusionFilterFactory getExclusionFactory() { - return exclusionFactory; + public List<String> getFilePatterns() { + return filePatterns; } /** - * @param exclusionFactory all requests to this AccessPoint will create an - * exclusionFilter from this factory when handling requests + * @param filePatterns List of file Patterns (regular expressions) that + * will be matched when querying the ResourceIndex - only SearchResults + * matching one of these patterns will be returned. */ - public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { - this.exclusionFactory = exclusionFactory; + public void setFilePatterns(List<String> filePatterns) { + this.filePatterns = filePatterns; } /** - * @return the configured AuthenticationControl operator in use with this - * AccessPoint. + * @return List of file String prefixes that will be matched when querying + * the ResourceIndex */ - public BooleanOperator<WaybackRequest> getAuthentication() { - return authentication; + public List<String> getFilePrefixes() { + return filePrefixes; } /** - * @param authentication the BooleanOperator which determines if incoming - * requests are allowed to connect to this AccessPoint. + * @param filePrefixes List of String file prefixes that will be matched + * when querying the ResourceIndex - only SearchResults from files + * with a prefix matching one of those in this List will be returned. */ - public void setAuthentication(BooleanOperator<WaybackRequest> authentication) { - this.authentication = authentication; + public void setFilePrefixes(List<String> filePrefixes) { + this.filePrefixes = filePrefixes; } + + /** * @return the WaybackCollection used by this AccessPoint */ @@ -687,49 +512,97 @@ } /** - * @return the String url prefix to use when generating self referencing - * URLs + * @return the QueryRenderer to use with this AccessPoint */ - public String getUrlRoot() { - return urlRoot; + public QueryRenderer getQuery() { + return query; } + + /** + * @param query the QueryRenderer responsible for returning query data to + * clients. + */ + public void setQuery(QueryRenderer query) { + this.query = query; + } /** - * @param urlRoot explicit URL prefix to use when creating self referencing - * URLs + * @return the RequestParser used by this AccessPoint to attempt to + * translate incoming HttpServletRequest objects into WaybackRequest + * objects */ - public void setUrlRoot(String urlRoot) { - this.urlRoot = urlRoot; + public RequestParser getParser() { + return parser; } + + /** + * @param parser the RequestParser to use with this AccessPoint + */ + public void setParser(RequestParser parser) { + this.parser = parser; + } /** - * @return the exactHostMatch + * @return the ReplayDispatcher to use with this AccessPoint, responsible + * for returning an appropriate ReplayRenderer given the user request and + * the returned document type. */ - public boolean isExactHostMatch() { - return exactHostMatch; + public ReplayDispatcher getReplay() { + return replay; } /** - * @param exactHostMatch if true, then only SearchResults exactly matching - * the requested hostname will be returned from this AccessPoint. If - * false, then hosts which canonicalize to the same host as requested - * hostname will be returned (www.) + * @param replay the ReplayDispatcher to use with this AccessPoint. */ - public void setExactHostMatch(boolean exactHostMatch) { - this.exactHostMatch = exactHostMatch; + public void setReplay(ReplayDispatcher replay) { + this.replay = replay; } /** - * @return the liveWebPrefix + * @return the ResultURIConverter used to construct Replay URLs within this + * AccessPoint */ - public String getLiveWebPrefix() { - return liveWebPrefix; + public ResultURIConverter getUriConverter() { + return uriConverter; } /** - * @param liveWebPrefix the liveWebPrefix to set + * @param uriConverter the ResultURIConverter to use with this AccessPoint + * to construct Replay URLs */ - public void setLiveWebPrefix(String liveWebPrefix) { - this.liveWebPrefix = liveWebPrefix; + public void setUriConverter(ResultURIConverter uriConverter) { + this.uriConverter = uriConverter; } + + + /** + * @return the ExclusionFilterFactory in use with this AccessPoint + */ + public ExclusionFilterFactory getExclusionFactory() { + return exclusionFactory; + } + + /** + * @param exclusionFactory all requests to this AccessPoint will create an + * exclusionFilter from this factory when handling requests + */ + public void setExclusionFactory(ExclusionFilterFactory exclusionFactory) { + this.exclusionFactory = exclusionFactory; + } + + /** + * @return the configured AuthenticationControl BooleanOperator in use with + * this AccessPoint. + */ + public BooleanOperator<WaybackRequest> getAuthentication() { + return authentication; + } + + /** + * @param auth the BooleanOperator which determines if incoming + * requests are allowed to connect to this AccessPoint. + */ + public void setAuthentication(BooleanOperator<WaybackRequest> auth) { + this.authentication = auth; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2010-04-27 22:45:40 UTC (rev 3080) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2010-04-27 22:47:37 UTC (rev 3081) @@ -45,84 +45,34 @@ import org.archive.wayback.liveweb.LiveWebCache; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.resourcestore.resourcefile.ArcResource; -import org.springframework.beans.factory.BeanNameAware; +import org.archive.wayback.util.webapp.AbstractRequestHandler; /** * @author brad * - * AccessPoint subclass which allows no Queries, but makes all replay requests - * through a LiveWebCache + * RequestHandler which satisfies all incoming requests through a LiveWebCache, + * using an internal AccessPoint to rewrite replayed documents. * */ -public class LiveWebAccessPoint extends ServletRequestContext implements BeanNameAware { +public class LiveWebAccessPoint extends AbstractRequestHandler { private AccessPoint inner = null; private LiveWebCache cache = null; private RobotExclusionFilterFactory robotFactory = null; private long maxCacheMS = 86400000; - private String beanName = null; - private int contextPort = 0; - private String contextName = null; - public void setBeanName(String beanName) { - this.beanName = beanName; - this.contextName = ""; - int idx = beanName.indexOf(":"); - if(idx > -1) { - contextPort = Integer.valueOf(beanName.substring(0,idx)); - contextName = beanName.substring(idx + 1); - } else { - try { - this.contextPort = Integer.valueOf(beanName); - } catch(NumberFormatException e) { - e.printStackTrace(); - } - } - } - /** - * @param httpRequest HttpServletRequest which is being handled - * @return the prefix of paths received by this server that are handled by - * this WaybackContext, including the trailing '/' - */ - public String getContextPath(HttpServletRequest httpRequest) { - String httpContextPath = httpRequest.getContextPath(); - if(contextName.length() == 0) { - return httpContextPath + "/"; - } - return httpContextPath + "/" + contextName + "/"; - } - - - protected String translateRequest(HttpServletRequest httpRequest, - boolean includeQuery) { - - String origRequestPath = httpRequest.getRequestURI(); - if(includeQuery) { - String queryString = httpRequest.getQueryString(); - if (queryString != null) { - origRequestPath += "?" + queryString; - } - } - String contextPath = getContextPath(httpRequest); - if (!origRequestPath.startsWith(contextPath)) { - if(contextPath.startsWith(origRequestPath)) { - // missing trailing '/', just omit: - return ""; - } - return null; - } - return origRequestPath.substring(contextPath.length()); - } - public boolean handleRequest(HttpServletRequest httpRequest, HttpServletResponse httpResponse) throws ServletException, IOException { - String urlString = translateRequest(httpRequest,true); + String urlString = translateRequestPathQuery(httpRequest); + boolean handled = true; WaybackRequest wbRequest = new WaybackRequest(); wbRequest.setAccessPoint(inner); - wbRequest.setContextPrefix(inner.getAbsoluteServerPrefix(httpRequest)); - wbRequest.setServerPrefix(inner.getAbsoluteServerPrefix(httpRequest)); + + wbRequest.setContextPrefix(inner.getUrlRoot()); + wbRequest.setServerPrefix(inner.getUrlRoot()); + wbRequest.setLiveWebRequest(true); wbRequest.setRequestUrl(urlString); URL url = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |