You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: <bra...@us...> - 2010-04-24 00:17:50
|
Revision: 3055 http://archive-access.svn.sourceforge.net/archive-access/?rev=3055&view=rev Author: bradtofel Date: 2010-04-24 00:17:43 +0000 (Sat, 24 Apr 2010) Log Message: ----------- INITIAL REV: Library to generate simple bar graphs, intended to be easily encoded as an opaque string and used as an IMG src in an HTML page. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/Graph.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphConfiguration.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphElement.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncoder.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncodingException.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RectangularGraphElement.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionData.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionGraphElement.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElement.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElements.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/Graph.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/Graph.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/Graph.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,116 @@ +/* Graph + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Graphics2D; + +/** + * @author brad + * + */ +public class Graph extends RectangularGraphElement { + + private RegionGraphElement regions[] = null; + private GraphConfiguration config = null; + + /** + * @param width the width of the graph + * @param height the height of the graph + * @param data the values to draw in the graph + * @param config the configuration to use when drawing the graph + */ + public Graph(int width, int height, RegionData data[], + GraphConfiguration config) { + super(0,0,width,height); + this.config = config; + + int totalValues = 0; + int maxValue = -1; + for(RegionData datum : data) { + int array[] = datum.getValues(); + totalValues += array.length; + for(int d : array) { + if(d > maxValue) { + maxValue = d; + } + } + } + int valuesSoFar = 0; + regions = new RegionGraphElement[data.length]; + for(int i = 0; i < data.length; i++) { + int vCount = data[i].getValues().length; + + int x = Graph.xlateX(width, totalValues, valuesSoFar); + int w = Graph.xlateX(width, totalValues, valuesSoFar + vCount) - x; + data[i].setMaxValue(maxValue); + regions[i] = new RegionGraphElement(x,0,w,height,data[i],config); + valuesSoFar += vCount; + } + } + + /** + * @return the RegionGraphElements for the graph + */ + public RegionGraphElement[] getRegions() { + return regions; + } + + public void draw(Graphics2D g2d) { + + // set up rendering hints: + config.setRenderingHints(g2d); + + // draw background: + g2d.setColor(config.backgroundColor); + g2d.fillRect(1, 1, width - 2, height - 2); + + for(RegionGraphElement region : regions) { + region.draw(g2d); + } + + // draw line below values: + int labelHeight = config.regionFontSize + (config.fontPadY * 2); + int valuesHeight = (height - labelHeight) + 1; + + g2d.setColor(config.regionBorderColor); + g2d.setStroke(config.regionBorderStroke); + g2d.drawLine(1, valuesHeight, width - 2, valuesHeight); + } + + static int xlateX(int w, int c, int i) { + if(i == 0) { + return 0; + } else if(i == c) { + return w; + } + float width = w; + float count = c; + float idx = i; + float x = (idx/count) * width; + return (int) x; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/Graph.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphConfiguration.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphConfiguration.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphConfiguration.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,128 @@ +/* GraphConfiguration + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.BasicStroke; +import java.awt.Color; +import java.awt.Font; +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.Stroke; + +/** + * @author brad + * + */ +public class GraphConfiguration { + final static float dash1[] = {3.0f}; + final static BasicStroke dashed = new BasicStroke(1.0f, + BasicStroke.CAP_BUTT, + BasicStroke.JOIN_MITER, + 3.0f, dash1, 0.0f); + + /** + * Main background color for graphs + */ + public Color backgroundColor = Color.white; + + /** + * font size for Year/Month labels + */ + public int regionFontSize = 9; + /** + * font name for Year/Month labels + */ + public String regionFontName = Font.SANS_SERIF; + /** + * font style for Year/Month labels + */ + public int regionFontStyle = Font.PLAIN; + /** + * font Color for Year/Month labels + */ + public Color regionLabelColor = Color.black; + /** + * top/bottom font padding for Year/Month labels + */ + public int fontPadY = 2; + /** + * left font padding for Year/Month labels + */ + public int fontPadX = 4; + + /** + * color for Year/Month border lines + */ + public Color regionBorderColor = Color.darkGray; + /** + * Stroke for Year/Month border lines + */ + public Stroke regionBorderStroke = dashed; + + /** + * Background color for active/selected Year/Month + */ + public Color regionHighlightColor = Color.lightGray; + + /** + * color for non-active/selected graph values + */ + public Color valueColor = Color.blue; + /** + * color for active/selected graph values + */ + public Color valueHighlightColor = Color.green; + + /** + * Minimum pixel height for non-zero graph values + */ + public int valueMinHeight = 5; + + + private Font regionLabelFont = null; + /** + * @return the current Font to use for Month/Year labels, combination of + * regionFontStyle, regionFontSize, regionFontName + */ + public Font getRegionLabelFont() { + if(regionLabelFont == null) { + regionLabelFont = new Font(regionFontName, + regionFontStyle,regionFontSize); + } + return regionLabelFont; + } + /** + * Set whatever redneringHints are needed to properly draw the graph, ie. + * AntiAliasing, etc. + * @param g2d The Graphics2D objects on which the hints should be set. + */ + public void setRenderingHints(Graphics2D g2d) { + g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, + RenderingHints.VALUE_ANTIALIAS_ON); + g2d.setRenderingHint(RenderingHints.KEY_TEXT_ANTIALIASING, + RenderingHints.VALUE_TEXT_ANTIALIAS_ON); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphConfiguration.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphElement.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphElement.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphElement.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,41 @@ +/* GraphElement + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Graphics2D; + +/** + * @author brad + * + */ +public interface GraphElement { + /** + * Draw the element onto a Graphics2D. + * + * @param g2d the Graphics2D onto which the element should be drawn. + */ + public void draw(Graphics2D g2d); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphElement.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncoder.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncoder.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncoder.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,172 @@ +/* GraphEncoder + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +/** + * @author brad + * + */ +public class GraphEncoder { + private static String DELIM = "_"; + private static String REGION_DELIM = ":"; + + /** + * convert a String-encoded graph into a usable Graph object, using + * default GraphConfiguration + * @param encodedGraph String encoded graph, as returned by getEncoded() + * @return a Graph, ready to use + * @throws GraphEncodingException if there were problems with the encoded + * data + */ + public static Graph decode(String encodedGraph) + throws GraphEncodingException { + return decode(encodedGraph, new GraphConfiguration()); + } + /** + * convert a String-encoded graph into a usable Graph object, using + * the provided GraphConfiguration. + * @param encodedGraph String encoded graph, as returned by getEncoded() + * @param config the GraphConfiguration to use + * @return a Graph, ready to use + * @throws GraphEncodingException if there were problems with the encoded + * data + */ + public static Graph decode(String encodedGraph, GraphConfiguration config) + throws GraphEncodingException { + // encoded = "800_35_REGIONDATA_REGIONDATA_REGIONDATA_REGIONDATA_..." + String parts[] = encodedGraph.split(DELIM); + int numRegions = parts.length - 2; + if(parts.length < 1) { + throw new GraphEncodingException("No regions defined!"); + } + int width; + int height; + try { + width = Integer.parseInt(parts[0]); + } catch(NumberFormatException e) { + throw new GraphEncodingException("Bad integer width:" + parts[0]); + } + try { + height = Integer.parseInt(parts[1]); + } catch(NumberFormatException e) { + throw new GraphEncodingException("Bad integer width:" + parts[0]); + } + RegionData data[] = new RegionData[numRegions]; + for(int i = 0; i < numRegions; i++) { + // REGIONDATA = "2001:-1:0ab3f70023f902f" + // LABEL:ACTIVE_IDX:HEXDATA + String regionParts[] = parts[i + 2].split(REGION_DELIM); + if(regionParts.length != 3) { + throw new GraphEncodingException("Wrong number of parts in " + + parts[i+2]); + } + int highlightedValue = Integer.parseInt(regionParts[1]); + int values[] = decodeHex(regionParts[2]); + data[i] = new RegionData(regionParts[0], highlightedValue, values); + } + return new Graph(width, height, data, config); + } + + /** + * Convert a complete Graph into an opaque String that can later be + * re-assembled into a Graph object. Note that GraphConfiguration + * information is NOT encoded into the opaque String. + * @param g Graph to encode + * @return opaque String which can later be used with decode() + */ + public static String encode(Graph g) { + RegionGraphElement rge[] = g.getRegions(); + RegionData data[] = new RegionData[rge.length]; + for(int i = 0; i < data.length; i++) { + data[i] = rge[i].getData(); + } + return encode(g.width, g.height, data); + } + + /** + * Convert a Graph fields into an opaque String that can later be + * re-assembled into a Graph object. Note that GraphConfiguration + * information is NOT encoded into the opaque String. + * @param width of the Graph + * @param height of the Graph + * @param data array of RegionData for the graph + * @return opaque String which can later be used with decode() + */ + public static String encode(int width, int height, RegionData data[]) { + StringBuilder sb = new StringBuilder(); + sb.append(width).append(DELIM); + sb.append(height); + boolean first = false; + for(RegionData datum : data) { + if(first) { + first = false; + } else { + sb.append(DELIM); + } + sb.append(datum.getLabel()).append(REGION_DELIM); + sb.append(datum.getHighlightedValue()).append(REGION_DELIM); + sb.append(encodeHex(datum.getValues())); + } + return sb.toString(); + } + + private static String encodeHex(int values[]) { + StringBuilder sb = new StringBuilder(values.length); + for(int value : values) { + if((value > 15) || (value < 0)){ + throw new IllegalArgumentException(); + } + sb.append(Integer.toHexString(value)); + } + return sb.toString(); + } + + private static int[] decodeHex(String hexString) { + int length = hexString.length(); + int values[] = new int[length]; + for(int i = 0; i < length; i++) { + char c = hexString.charAt(i); + if(c >= '0') { + if(c <= '9') { + values[i] = c - '0'; + } else { + if(c > 'f') { + throw new IllegalArgumentException(); + } else { + if(c >= 'a') { + values[i] = c - 'W'; + } else { + throw new IllegalArgumentException(); + } + } + } + } else { + throw new IllegalArgumentException(); + } + } + return values; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncoder.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncodingException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncodingException.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncodingException.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,46 @@ +/* GraphEncodingException + * + * $Id$: + * + * Created on Apr 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +/** + * @author brad + * + */ +public class GraphEncodingException extends Exception { + + /** + * + */ + private static final long serialVersionUID = -998274644110299354L; + + /** + * @param string message context for the exception + */ + public GraphEncodingException(String string) { + super(string); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphEncodingException.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphRenderer.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,102 @@ +/* GraphRenderer + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Graphics2D; +import java.awt.Rectangle; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.io.OutputStream; + +import javax.imageio.ImageIO; + +/** + * @author brad + * + */ +public class GraphRenderer { + /** + * appropriate Content-Type HTTP header value for graph image content + * produced by render(OutputStream,Graph) + */ + public final static String RENDERED_IMAGE_MIME = "image/png"; + /** + * Create both an HTML AREA map and an HTML IMG for a graph, using provided + * href targets, and titles within the AREA map. + * @param graph Graph to draw + * @param mapName name of HTML AREA map + * @param imgUrl URL to rendered Graph, see GraphEncoder.encode() + * @param targets URL href targets for the years in the Graph + * @param titles titles for the years in the graph. + * @return HTML String for the resulting AREA and IMG + */ + public static String renderHTML(Graph graph, String mapName, String imgUrl, + String targets[], String titles[]) { + + StringBuilder sb = new StringBuilder(); + sb.append("<map name=\"").append(mapName).append("\">"); + RegionGraphElement rge[] = graph.getRegions(); + int count = rge.length; + for(int i = 0; i < count; i++) { + if(targets[i] != null) { + Rectangle r = rge[i].getBoundingRectangle(); + sb.append("<area href=\"").append(targets[i]).append("\""); + if(titles[i] != null) { + sb.append(" title=\"").append(titles[i]).append("\""); + } + sb.append(" shape=\"rect\" coords=\""); + sb.append(r.x).append(","); + sb.append(r.y).append(","); + sb.append(r.x+r.width).append(","); + sb.append(r.y+r.height).append("\" border=\"1\" />"); + } + } + sb.append("</map>"); + sb.append("<image src=\"").append(imgUrl).append("\""); + sb.append(" border=\"0\" width=\"").append(graph.width).append("\""); + sb.append(" height=\"").append(graph.height).append("\""); + sb.append(" usemap=\"#").append(mapName).append("\" />"); + + return sb.toString(); + } + + /** + * Send a PNG format byte stream for the argument Graph to the provided + * OutputStream + * @param target OutputStream to write PNG format bytes + * @param graph Graph to send to the target + * @throws IOException for usual reasons. + */ + public void render(OutputStream target, Graph graph) throws IOException { + + BufferedImage bi = + new BufferedImage(graph.width, graph.height, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = bi.createGraphics(); + graph.draw(g2d); + ImageIO.write(bi, "png", target); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/GraphRenderer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RectangularGraphElement.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RectangularGraphElement.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RectangularGraphElement.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,76 @@ +/* RectangularGraphElement + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Rectangle; + +/** + * A GraphElement which allows interrogation of it's bounding java.awt.Rectangle + * @author brad + * + */ +public abstract class RectangularGraphElement implements GraphElement { + protected int x = 0; + protected int y = 0; + protected int width = 0; + protected int height = 0; + + /** + * Construct a new RectangularGraphElement with the supplied values + * @param x the rectangles x in the global coordinate space + * @param y the rectangles y in the global coordinate space + * @param width the rectangles width + * @param height the rectangles height + */ + public RectangularGraphElement(int x, int y, int width, int height) { + this.x = x; + this.y = y; + this.width = width; + this.height = height; + } + + /** + * @return the width + */ + public int getWidth() { + return width; + } + + /** + * @return the height + */ + public int getHeight() { + return height; + } + + /** + * @return the java.awt.Rectangle which bounds this GraphElement, in the + * coordinate space of the Graph which contains it. + */ + public Rectangle getBoundingRectangle() { + return new Rectangle(x, y, width, height); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RectangularGraphElement.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionData.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionData.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionData.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,95 @@ +/* RegionData + * + * $Id$: + * + * Created on Apr 14, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +/** + * Containing object for data associated with one region (month/year/etc) in the + * graph, including the: + * label + * highlighted value index + * int array of values to graph within this region + * the global max int value across all values in the overall graph + * @author brad + * + */ +public class RegionData { + private String label = null; + private int highlightedValue = -1; + private int values[] = null; + private int maxValue = -1; + /** + * @param label the text label to draw for this region + * @param highlightedValue the index of the value to "highlight" or -1, if + * no values should be highlighted. Note that highlighting a value in a + * region causes the entire region to get a background highlight, also + * @param values int array of raw values, each between 0 and 15. + */ + public RegionData(String label, int highlightedValue, int values[]) { + this.label = label; + this.highlightedValue = highlightedValue; + this.values = values; + } + /** + * @return the String label for this region + */ + public String getLabel() { + return label; + } + /** + * @return the index of the highlighted value in this region, or -1 if none + * are highlighted + */ + public int getHighlightedValue() { + return highlightedValue; + } + /** + * @return the raw array of values for this region + */ + public int[] getValues() { + return values; + } + /** + * @return the global graph maximum value, used for normalizing values to + * ensure the values use the entire Y axis. + */ + public int getMaxValue() { + return maxValue; + } + + /** + * @param maxValue the global graph maximum value, used for normalizing + * values to ensure the values use the entire Y axis. + */ + public void setMaxValue(int maxValue) { + this.maxValue = maxValue; + } + /** + * @return true if one of the values in this region is highlighted + */ + public boolean hasHighlightedValue() { + return (highlightedValue != -1); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionData.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionGraphElement.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionGraphElement.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionGraphElement.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,87 @@ +/* RegionGraphElement + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Graphics2D; + +/** + * @author brad + * + */ +public class RegionGraphElement extends RectangularGraphElement { + private RegionData data = null; + private ValueGraphElements values = null; + private GraphConfiguration config = null; + + /** + * @param x the x coordinate for the region, in the global coordinate space + * of the containing Graph + * @param y the y coordinate for the region, in the global coordinate space + * of the containing Graph + * @param width the width of this region, in pixels + * @param height the height of this region, in pixels + * @param data the data to use for this region + * @param config the GraphConfiguration for this Graph + */ + public RegionGraphElement(int x, int y, int width, int height, + RegionData data, GraphConfiguration config) { + super(x,y,width,height); +// System.err.format("Created region (%d,%d)-(%d,%d)\n",x,y,width,height); + this.data = data; + this.config = config; + int labelHeight = config.regionFontSize + (config.fontPadY * 2); + int valuesHeight = height - labelHeight; + this.values = new ValueGraphElements(x+1, y+1, width - 1, valuesHeight, + data.getHighlightedValue(), data.getValues(), + data.getMaxValue(), config); + } + + /** + * @return the RegionData for this region + */ + public RegionData getData() { + return data; + } + + public void draw(Graphics2D g2d) { + + if(data.hasHighlightedValue()) { + g2d.setColor(config.regionHighlightColor); + g2d.fillRect(x + 1, y+1, width - 1, height-2); + } + + g2d.setColor(config.regionBorderColor); + g2d.setStroke(config.regionBorderStroke); + g2d.drawLine(x, y, x, y + height); + + int fontY = (y + height) - config.fontPadY; + + g2d.setColor(config.regionLabelColor); + g2d.setFont(config.getRegionLabelFont()); + g2d.drawString(data.getLabel(), x + config.fontPadX, fontY); + values.draw(g2d); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/RegionGraphElement.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElement.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElement.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElement.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,59 @@ +/* ValueGraphElement + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Graphics2D; + +/** + * @author brad + * + */ +public class ValueGraphElement extends RectangularGraphElement { + boolean highlighted = false; + GraphConfiguration config = null; + /** + * @param x the rectangles x in the global coordinate space + * @param y the rectangles y in the global coordinate space + * @param width the rectangles width + * @param height the rectangles height + * @param highlighted true if this value is highlighted + * @param config reference to the configuration for the graph + */ + public ValueGraphElement(int x, int y, int width, int height, + boolean highlighted, GraphConfiguration config) { + super(x, y, width, height); + + this.highlighted = highlighted; + this.config = config; + } + + + public void draw(Graphics2D g2d) { + g2d.setColor(highlighted ? + config.valueHighlightColor : config.valueColor); + g2d.fillRect(x, y, width, height); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElement.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElements.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElements.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElements.java 2010-04-24 00:17:43 UTC (rev 3055) @@ -0,0 +1,109 @@ +/* ValueGraphElements + * + * $Id$: + * + * Created on Apr 9, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.graph; + +import java.awt.Graphics2D; +import java.util.NoSuchElementException; + +/** + * @author brad + * + */ +public class ValueGraphElements extends RectangularGraphElement { + private int values[] = null; + private int highlightValue = -1; + private int maxValue = -1; + private GraphConfiguration config = null; + + /** + * @param x the rectangles x in the global coordinate space + * @param y the rectangles y in the global coordinate space + * @param width the rectangles width + * @param height the rectangles height + * @param hightlightValue the index of the value to highlight + * @param values array of int values, each must be 0-15 + * @param maxValue the global maximum value across all elements in the Graph + * @param config the configuration for the Graph + */ + public ValueGraphElements(int x, int y, int width, int height, + int hightlightValue, int values[], int maxValue, + GraphConfiguration config) { + + super(x, y, width, height); +// System.err.format("Created VGEs (%d,%d)-(%d,%d)\n",x,y,width,height); + + this.highlightValue = hightlightValue; + this.values = values; + this.config = config; + this.maxValue = maxValue; + } + + /** + * return the i'th ValueGraphElement + * @param i the index of the element to return + * @return the ValueGraphElement at index i + */ + public ValueGraphElement getElement(int i) { + if((i < 0) || (i >= values.length)) { + throw new NoSuchElementException(); + } + int minHeight = config.valueMinHeight; + + // normalize height to value between 0 and 1: + float value = ((float) values[i]) / ((float) maxValue); + float usableHeight = height - minHeight; + int valueHeight = (int) (usableHeight * value) + minHeight; + + int elX = Graph.xlateX(width, values.length, i); + int elW = Graph.xlateX(width, values.length, i+1) - elX; + int elY = height - valueHeight; + boolean hot = i == highlightValue; + return new ValueGraphElement(x + elX, y + elY, elW, valueHeight, hot, config); + } + + public void draw(Graphics2D g2d) { + for(int i = 0; i < values.length; i++) { + if(values[i] > 0) { + getElement(i).draw(g2d); + } + } + } + + /** + * @return the index of the highlighted value + */ + public int getHighlightValue() { + return highlightValue; + } + + /** + * @return the raw int values for the graph + */ + public int[] getRawValues() { + return values; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/graph/ValueGraphElements.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3054 http://archive-access.svn.sourceforge.net/archive-access/?rev=3054&view=rev Author: bradtofel Date: 2010-04-23 23:39:12 +0000 (Fri, 23 Apr 2010) Log Message: ----------- FEATURE: added new filter to omit records with userinfo in the original URL Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java 2010-04-23 23:38:06 UTC (rev 3053) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filterfactory/CoreCaptureFilterGroup.java 2010-04-23 23:39:12 UTC (rev 3054) @@ -32,6 +32,7 @@ import org.archive.wayback.resourceindex.filters.ConditionalGetAnnotationFilter; import org.archive.wayback.resourceindex.filters.DuplicateRecordFilter; import org.archive.wayback.resourceindex.filters.GuardRailFilter; +import org.archive.wayback.resourceindex.filters.UserInfoInAuthorityFilter; import org.archive.wayback.resourceindex.filters.WARCRevisitAnnotationFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.ObjectFilterChain; @@ -47,6 +48,7 @@ chain.addFilter(new WARCRevisitAnnotationFilter()); chain.addFilter(new ConditionalGetAnnotationFilter()); } + chain.addFilter(new UserInfoInAuthorityFilter()); } public List<ObjectFilter<CaptureSearchResult>> getFilters() { return chain.getFilters(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3053 http://archive-access.svn.sourceforge.net/archive-access/?rev=3053&view=rev Author: bradtofel Date: 2010-04-23 23:38:06 +0000 (Fri, 23 Apr 2010) Log Message: ----------- INITIAL REV: Filter to remove CaptureSearchResults if they contain userinfo in the original URL. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UserInfoInAuthorityFilter.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UserInfoInAuthorityFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UserInfoInAuthorityFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UserInfoInAuthorityFilter.java 2010-04-23 23:38:06 UTC (rev 3053) @@ -0,0 +1,59 @@ +/* UserInfoInAuthorityFilter + * + * $Id$: + * + * Created on Apr 16, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.resourceindex.filters; + +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.util.ObjectFilter; +import org.archive.wayback.util.url.UrlOperations; + +/** + * Class which omits CaptureSearchResults that have and '@' in the original URL + * field, if that '@' is after the scheme, and before the first '/' or ':' + * + * @author brad + * + */ +public class UserInfoInAuthorityFilter implements ObjectFilter<CaptureSearchResult> { + private boolean wantUserInfo = false; + public int filterObject(CaptureSearchResult o) { + boolean hasUserInfo = + (UrlOperations.urlToUserInfo(o.getOriginalUrl()) != null); + return hasUserInfo == wantUserInfo + ? ObjectFilter.FILTER_INCLUDE : ObjectFilter.FILTER_EXCLUDE; + } + /** + * @return the wantUserInfo + */ + public boolean isWantUserInfo() { + return wantUserInfo; + } + /** + * @param wantUserInfo the wantUserInfo to set + */ + public void setWantUserInfo(boolean wantUserInfo) { + this.wantUserInfo = wantUserInfo; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/UserInfoInAuthorityFilter.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-23 23:36:46
|
Revision: 3052 http://archive-access.svn.sourceforge.net/archive-access/?rev=3052&view=rev Author: bradtofel Date: 2010-04-23 23:36:40 +0000 (Fri, 23 Apr 2010) Log Message: ----------- FEATURE: added spacesToNBSP() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java 2010-04-23 23:35:12 UTC (rev 3051) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java 2010-04-23 23:36:40 UTC (rev 3052) @@ -204,4 +204,13 @@ public String escapeJavaScript(String raw) { return StringEscapeUtils.escapeJavaScript(raw); } + + /** + * Convert... spaces to + * @param input to replace + * @return with spaces replaced + */ + public String spaceToNBSP(String input) { + return input.replaceAll(" ", " "); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-23 23:35:19
|
Revision: 3051 http://archive-access.svn.sourceforge.net/archive-access/?rev=3051&view=rev Author: bradtofel Date: 2010-04-23 23:35:12 +0000 (Fri, 23 Apr 2010) Log Message: ----------- BUGFIX: fixed(hopefully) problem when extracting hostname from URLs containing userinfo FEATURE: added urlToUserInfo() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-04-15 00:23:54 UTC (rev 3050) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-04-23 23:35:12 UTC (rev 3051) @@ -24,6 +24,8 @@ */ package org.archive.wayback.util.url; +import java.net.MalformedURLException; +import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -97,6 +99,10 @@ private static final Pattern AUTHORITY_REGEX_SIMPLE = Pattern.compile("([0-9a-z_.-]++)"); + private static final Pattern HOST_REGEX_SIMPLE = + Pattern.compile("(?:[0-9a-z_.:-]+@)?([0-9a-z_.-]++)"); + private static final Pattern USERINFO_REGEX_SIMPLE = + Pattern.compile("([0-9a-z_.:-]+)(?:@[0-9a-z_.-]++)"); /** * @param urlPart @@ -184,23 +190,44 @@ return url.substring(pathIdx); } } - + public static String urlToHost(String url) { - if(url.startsWith("dns:")) { - return url.substring(4); + String lcUrl = url.toLowerCase(); + if(lcUrl.startsWith("dns:")) { + return lcUrl.substring(4); } for(String scheme : ALL_SCHEMES) { - if(url.startsWith(scheme)) { - int hostIdx = scheme.length(); + if(lcUrl.startsWith(scheme)) { + int authorityIdx = scheme.length(); - Matcher m = AUTHORITY_REGEX_SIMPLE.matcher(url.substring(hostIdx)); + Matcher m = + HOST_REGEX_SIMPLE.matcher(lcUrl.substring(authorityIdx)); if(m.find()) { - return m.group(0); + return m.group(1); } } } return url; } + + public static String urlToUserInfo(String url) { + String lcUrl = url.toLowerCase(); + if(lcUrl.startsWith("dns:")) { + return null; + } + for(String scheme : ALL_SCHEMES) { + if(lcUrl.startsWith(scheme)) { + int authorityIdx = scheme.length(); + + Matcher m = + USERINFO_REGEX_SIMPLE.matcher(lcUrl.substring(authorityIdx)); + if(m.find()) { + return m.group(1); + } + } + } + return null; + } /** * Find and return the parent directory of the URL argument Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-04-15 00:23:54 UTC (rev 3050) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-04-23 23:35:12 UTC (rev 3051) @@ -35,7 +35,14 @@ assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com")); assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com")); assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com")); - + + assertEquals("www.google.com",UrlOperations.urlToHost("http://www.GOOGLE.COM")); + assertEquals("google.com",UrlOperations.urlToHost("http://GOOGLE.COM/")); + assertEquals("google.com",UrlOperations.urlToHost("http://GOOGLE.COM")); + assertEquals("google.com",UrlOperations.urlToHost("http://GOOGLE.COM:80")); + assertEquals("google.com",UrlOperations.urlToHost("http://GOOGLE.COM:80/")); + assertEquals("google.com",UrlOperations.urlToHost("http://GOOGLE.COM:80/foo")); + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/")); assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/")); assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/")); @@ -66,8 +73,34 @@ assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com\\")); assertEquals("www.foo.com",UrlOperations.urlToHost("http://www.foo.com\\")); assertEquals("www.foo.com",UrlOperations.urlToHost("http://www.foo.com:80\\")); + + + assertEquals("foo.com",UrlOperations.urlToHost("http://us...@fo...")); + assertEquals("www.foo.com",UrlOperations.urlToHost("http://us...@ww...")); + assertEquals("www.foo.com",UrlOperations.urlToHost("http://user:pa...@ww...")); + + assertEquals("www.foo.com",UrlOperations.urlToHost("http://user:pa...@ww.../")); + assertEquals("www.foo.com",UrlOperations.urlToHost("http://user:pa...@ww.../boo@foo")); } + public void testUrlToUserInfo() { + assertEquals(null,UrlOperations.urlToUserInfo("dns:foo.com")); + assertEquals(null,UrlOperations.urlToUserInfo("http://foo.com")); + assertEquals(null,UrlOperations.urlToUserInfo("https://foo.com")); + assertEquals(null,UrlOperations.urlToUserInfo("ftp://foo.com")); + assertEquals(null,UrlOperations.urlToUserInfo("ftp://foo.com/")); + assertEquals(null,UrlOperations.urlToUserInfo("http://foo.com:80/")); + assertEquals(null,UrlOperations.urlToUserInfo("http://foo.com:80")); + assertEquals(null,UrlOperations.urlToUserInfo("http://www.foo.com:80\\")); + + assertEquals("user",UrlOperations.urlToUserInfo("http://us...@fo...")); + assertEquals("user",UrlOperations.urlToUserInfo("http://us...@ww...")); + assertEquals("user:pass",UrlOperations.urlToUserInfo("http://user:pa...@ww...")); + assertEquals("user:pass",UrlOperations.urlToUserInfo("http://user:pa...@ww...:8080")); + assertEquals("user:pass",UrlOperations.urlToUserInfo("http://user:pa...@ww...:8080/boo@arb")); + assertEquals("www.foo.com",UrlOperations.urlToHost("http://user:pa...@ww.../")); + assertEquals("www.foo.com",UrlOperations.urlToHost("http://user:pa...@ww.../boo@foo")); + } public void testResolveUrl() { for(String scheme : UrlOperations.ALL_SCHEMES) { @@ -83,9 +116,7 @@ assertEquals(scheme + "a.org/1/2", UrlOperations.resolveUrl(scheme + "a.org/3","1/2")); - } - } public void testUrlToScheme() { assertEquals("http://",UrlOperations.urlToScheme("http://a.com/")); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-15 00:24:00
|
Revision: 3050 http://archive-access.svn.sourceforge.net/archive-access/?rev=3050&view=rev Author: bradtofel Date: 2010-04-15 00:23:54 +0000 (Thu, 15 Apr 2010) Log Message: ----------- CONFIG: Set heritrix-commons version to 3.1.1 Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/pom.xml Modified: trunk/archive-access/projects/wayback/wayback-core/pom.xml =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/pom.xml 2010-04-14 23:22:41 UTC (rev 3049) +++ trunk/archive-access/projects/wayback/wayback-core/pom.xml 2010-04-15 00:23:54 UTC (rev 3050) @@ -54,22 +54,21 @@ <version>2.4</version> <scope>provided</scope> </dependency> - <!-- <dependency> <groupId>org.archive.heritrix</groupId> - <artifactId>commons</artifactId> - <version>3.0.0-SNAPSHOT</version> + <artifactId>heritrix-commons</artifactId> + <version>3.1.1-SNAPSHOT</version> </dependency> - --> <dependency> - <groupId>org.archive.heritrix</groupId> - <artifactId>commons</artifactId> - <version>2.0.3-SNAPSHOT</version> - </dependency> - <dependency> <groupId>org.archive.access-control</groupId> <artifactId>access-control</artifactId> <version>0.0.1-SNAPSHOT</version> + <exclusions> + <exclusion> + <groupId>commons-pool</groupId> + <artifactId>commons-pool</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.mozilla</groupId> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 23:22:48
|
Revision: 3049 http://archive-access.svn.sourceforge.net/archive-access/?rev=3049&view=rev Author: bradtofel Date: 2010-04-14 23:22:41 +0000 (Wed, 14 Apr 2010) Log Message: ----------- CONFIG: default logs to stdout Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/log4j.properties Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/log4j.properties =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/log4j.properties 2010-04-14 21:58:03 UTC (rev 3048) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/log4j.properties 2010-04-14 23:22:41 UTC (rev 3049) @@ -1,8 +1,11 @@ log4j.rootLogger=WARN, R -log4j.appender.R=org.apache.log4j.RollingFileAppender -log4j.appender.R.File=/tmp/wayback.log -log4j.appender.R.MaxFileSize=100MB -log4j.appender.R.MaxBackupIndex=10 +log4j.appender.R=org.apache.log4j.ConsoleAppender +log4j.appender.R.target=System.err +#log4j.appender.R=org.apache.log4j.RollingFileAppender +#log4j.appender.R.File=/tmp/wayback.log +#log4j.appender.R.MaxFileSize=100MB +#log4j.appender.R.MaxBackupIndex=10 log4j.appender.R.layout=org.apache.log4j.PatternLayout log4j.appender.R.layout.ConversionPattern=%d{ISO8601} %p %t %c - %m%n +log4j.logger.org.archive.wayback=WARN log4j.logger.org.archive.wayback.webapp.AccessPoint=INFO This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:58:15
|
Revision: 3048 http://archive-access.svn.sourceforge.net/archive-access/?rev=3048&view=rev Author: bradtofel Date: 2010-04-14 21:58:03 +0000 (Wed, 14 Apr 2010) Log Message: ----------- FEATURE: added English version of new Partition headers, and Graph titles Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties 2010-04-14 21:56:30 UTC (rev 3047) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/classes/WaybackUI.properties 2010-04-14 21:58:03 UTC (rev 3048) @@ -117,3 +117,25 @@ AdvancedSearch.searchTypeLabel=Show urls AdvancedSearch.searchTypeExactOption=exactly matching AdvancedSearch.searchTypePrefixOption=beginning with + + +Partition.columnSummary={0,choice,0#0 pages|1#1 page|1<{0,number,integer} pages} +PartitionSize.dateHeader.day={0,date,MMM d} +PartitionSize.dateHeader.hour={0,date,h a} +PartitionSize.dateHeader.month={0,date,MMM yyyy} +PartitionSize.dateHeader.twomonth={0,date,MMM yyyy} - {1,date,MMM yyyy} +PartitionSize.dateHeader.twoyear={0,date,MMM yyyy} - {1,date,MMM yyyy} +PartitionSize.dateHeader.week={0,date,MMM d} - {1,date,MMM d} +PartitionSize.dateHeader.year={0,date,yyyy} + +graph.title=Jump to first record in {0,date,yyyy} : ({0,date,MMM d}) +graph.prevYear= ‹‹‹‹Year +graph.prevMonth=‹‹‹Month +graph.prevDay=‹‹Day +graph.prevCapture=‹Prev +graph.current=Showing Date {1,date,MMM yyyy} +graph.nextCapture=Next› +graph.nextDay=Day›› +graph.nextMonth=Month››› +graph.nextYear=Year›››› + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:56:37
|
Revision: 3047 http://archive-access.svn.sourceforge.net/archive-access/?rev=3047&view=rev Author: bradtofel Date: 2010-04-14 21:56:30 +0000 (Wed, 14 Apr 2010) Log Message: ----------- REFACTOR: now uses new partitioning code, and HTML string escaping. Old implementation left as-was, for the moment, in OldCalendarResults.jsp for easy consulting/diffing with previous implementation Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/CalendarResults.jsp Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/OldCalendarResults.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/CalendarResults.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/CalendarResults.jsp 2010-04-14 21:50:33 UTC (rev 3046) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/CalendarResults.jsp 2010-04-14 21:56:30 UTC (rev 3047) @@ -9,8 +9,10 @@ <%@ page import="org.archive.wayback.core.CaptureSearchResults" %> <%@ page import="org.archive.wayback.core.UIResults" %> <%@ page import="org.archive.wayback.core.WaybackRequest" %> -<%@ page import="org.archive.wayback.query.resultspartitioner.ResultsPartitionsFactory" %> -<%@ page import="org.archive.wayback.query.resultspartitioner.ResultsPartition" %> +<%@ page import="org.archive.wayback.partition.CaptureSearchResultPartitionMap" %> +<%@ page import="org.archive.wayback.util.partition.Partition" %> +<%@ page import="org.archive.wayback.util.partition.Partitioner" %> +<%@ page import="org.archive.wayback.util.partition.PartitionSize" %> <%@ page import="org.archive.wayback.util.StringFormatter" %> <jsp:include page="/WEB-INF/template/UI-header.jsp" flush="true" /> <jsp:include page="/WEB-INF/template/CookieJS.jsp" flush="true" /> @@ -20,7 +22,7 @@ WaybackRequest wbRequest = results.getWbRequest(); CaptureSearchResults cResults = results.getCaptureResults(); StringFormatter fmt = wbRequest.getFormatter(); -String searchString = wbRequest.getRequestUrl(); +String searchString = fmt.escapeHtml(wbRequest.getRequestUrl()); List<String> closeMatches = cResults.getCloseMatches(); @@ -30,8 +32,16 @@ long lastResult = cResults.getReturnedCount() + firstResult; long resultCount = cResults.getMatchingCount(); -ArrayList<ResultsPartition> partitions = - ResultsPartitionsFactory.get(cResults, wbRequest); +CaptureSearchResultPartitionMap map = + new CaptureSearchResultPartitionMap(); +Partitioner<CaptureSearchResult> partitioner = + new Partitioner<CaptureSearchResult>(map); +PartitionSize size = partitioner.getSize(searchStartDate,searchEndDate,13); +List<Partition<CaptureSearchResult>> partitions = + partitioner.getRange(size,searchStartDate,searchEndDate); + +Iterator<CaptureSearchResult> it = cResults.iterator(); +partitioner.populate(partitions,it); int numPartitions = partitions.size(); %> <table border="0" cellpadding="5" width="100%" class="mainSearchBanner" cellspacing="0"> @@ -60,10 +70,10 @@ <tr bgcolor="#CCCCCC"> <% for(int i = 0; i < numPartitions; i++) { - ResultsPartition partition = partitions.get(i); + Partition<CaptureSearchResult> partition = partitions.get(i); %> <td align="center" class="mainBigBody"> - <%= partition.getTitle() %> + <%= fmt.format("PartitionSize.dateHeader."+size.name(),partition.getStart(), partition.getEnd()) %> </td> <% } @@ -77,10 +87,10 @@ <tr bgcolor="#CCCCCC"> <% for(int i = 0; i < numPartitions; i++) { - ResultsPartition partition = (ResultsPartition) partitions.get(i); + Partition<CaptureSearchResult> partition = partitions.get(i); %> <td align="center" class="mainBigBody"> - <%= fmt.format("ResultPartition.columnSummary",partition.resultsCount()) %> + <%= fmt.format("Partition.columnSummary",partition.count()) %> </td> <% } @@ -96,8 +106,8 @@ String lastMD5 = null; for(int i = 0; i < numPartitions; i++) { - ResultsPartition partition = (ResultsPartition) partitions.get(i); - ArrayList<CaptureSearchResult> partitionResults = partition.getMatches(); + Partition<CaptureSearchResult> partition = partitions.get(i); + List<CaptureSearchResult> partitionResults = partition.list(); %> <td nowrap class="mainBody" valign="top"> <% @@ -125,7 +135,7 @@ String arcFile = result.getFile(); String arcOffset = String.valueOf(result.getOffset()); - String replayUrl = results.resultToReplayUrl(result); + String replayUrl = fmt.escapeHtml(results.resultToReplayUrl(result)); boolean updated = false; if(lastMD5 == null) { @@ -164,8 +174,9 @@ <% for(String closeMatch : closeMatches) { tmp.setRequestUrl(closeMatch); - String link = tmp.getContextPrefix() + "query?" + - tmp.getQueryArguments(); + String link = fmt.escapeHtml(tmp.getContextPrefix() + "query?" + + tmp.getQueryArguments()); + closeMatch = fmt.escapeHtml(closeMatch); %> <a href="<%= link %>"><%= closeMatch %></a><br> <% @@ -184,7 +195,7 @@ <% } else { %> - <a href="<%= results.urlForPage(i) %>"><%= i %></a> + <a href="<%= fmt.escapeHtml(results.urlForPage(i)) %>"><%= i %></a> <% } } Added: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/OldCalendarResults.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/OldCalendarResults.jsp (rev 0) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/OldCalendarResults.jsp 2010-04-14 21:56:30 UTC (rev 3047) @@ -0,0 +1,193 @@ +<%@ page language="java" pageEncoding="utf-8" contentType="text/html;charset=utf-8"%> +<%@ page import="java.util.List" %> +<%@ page import="java.util.ArrayList" %> +<%@ page import="java.util.Date" %> +<%@ page import="java.util.Iterator" %> +<%@ page import="java.text.ParseException" %> +<%@ page import="org.archive.wayback.WaybackConstants" %> +<%@ page import="org.archive.wayback.core.CaptureSearchResult" %> +<%@ page import="org.archive.wayback.core.CaptureSearchResults" %> +<%@ page import="org.archive.wayback.core.UIResults" %> +<%@ page import="org.archive.wayback.core.WaybackRequest" %> +<%@ page import="org.archive.wayback.query.resultspartitioner.ResultsPartitionsFactory" %> +<%@ page import="org.archive.wayback.query.resultspartitioner.ResultsPartition" %> +<%@ page import="org.archive.wayback.util.StringFormatter" %> +<jsp:include page="/WEB-INF/template/UI-header.jsp" flush="true" /> +<jsp:include page="/WEB-INF/template/CookieJS.jsp" flush="true" /> +<% +UIResults results = UIResults.extractCaptureQuery(request); + +WaybackRequest wbRequest = results.getWbRequest(); +CaptureSearchResults cResults = results.getCaptureResults(); +StringFormatter fmt = wbRequest.getFormatter(); +String searchString = wbRequest.getRequestUrl(); +List<String> closeMatches = cResults.getCloseMatches(); + + +Date searchStartDate = wbRequest.getStartDate(); +Date searchEndDate = wbRequest.getEndDate(); +long firstResult = cResults.getFirstReturned(); +long lastResult = cResults.getReturnedCount() + firstResult; +long resultCount = cResults.getMatchingCount(); + +ArrayList<ResultsPartition> partitions = + ResultsPartitionsFactory.get(cResults, wbRequest); +int numPartitions = partitions.size(); +%> +<table border="0" cellpadding="5" width="100%" class="mainSearchBanner" cellspacing="0"> + <tr> + <td> + <%= fmt.format("PathQueryClassic.searchedFor",searchString) %> + </td> + <td align="right"> + Set Anchor Window: + <jsp:include page="/WEB-INF/template/AnchorWindow.jsp" flush="true" /> + <%= fmt.format("PathQueryClassic.resultsSummary",resultCount) %> + </td> + </tr> +</table> +<br> + + +<table border="0" width="100%"> + <tr bgcolor="#CCCCCC"> + <td colspan="<%= numPartitions %>" align="center" class="mainCalendar"> + <%= fmt.format("PathQueryClassic.searchResults",searchStartDate,searchEndDate) %> + </td> + </tr> + +<!-- RESULT COLUMN HEADERS --> + <tr bgcolor="#CCCCCC"> +<% + for(int i = 0; i < numPartitions; i++) { + ResultsPartition partition = partitions.get(i); +%> + <td align="center" class="mainBigBody"> + <%= partition.getTitle() %> + </td> +<% + } +%> + </tr> +<!-- /RESULT COLUMN HEADERS --> + + + +<!-- RESULT COLUMN COUNTS --> + <tr bgcolor="#CCCCCC"> +<% + for(int i = 0; i < numPartitions; i++) { + ResultsPartition partition = (ResultsPartition) partitions.get(i); +%> + <td align="center" class="mainBigBody"> + <%= fmt.format("ResultPartition.columnSummary",partition.resultsCount()) %> + </td> +<% + } +%> + </tr> +<!-- /RESULT COLUMN COUNTS --> + + +<!-- RESULT COLUMN DATA --> + <tr bgcolor="#EBEBEB"> +<% + boolean first = false; + String lastMD5 = null; + + for(int i = 0; i < numPartitions; i++) { + ResultsPartition partition = (ResultsPartition) partitions.get(i); + ArrayList<CaptureSearchResult> partitionResults = partition.getMatches(); +%> + <td nowrap class="mainBody" valign="top"> +<% + if(partitionResults.size() == 0) { +%> + +<% + } else { + + for(int j = 0; j < partitionResults.size(); j++) { + + CaptureSearchResult result = partitionResults.get(j); + String url = result.getUrlKey(); + String captureTimestamp = result.getCaptureTimestamp(); + Date captureDate = result.getCaptureDate(); + String prettyDate = fmt.format("PathQuery.classicResultLinkText", + captureDate); + String origHost = result.getOriginalHost(); + String MD5 = result.getDigest(); + String redirectFlag = (0 == result.getRedirectUrl().compareTo("-")) + ? "" : fmt.format("PathPrefixQuery.redirectIndicator"); + String httpResponse = result.getHttpCode(); + String mimeType = result.getMimeType(); + + String arcFile = result.getFile(); + String arcOffset = String.valueOf(result.getOffset()); + + String replayUrl = results.resultToReplayUrl(result); + + boolean updated = false; + if(lastMD5 == null) { + lastMD5 = MD5; + updated = true; + } else if(0 != lastMD5.compareTo(MD5)) { + updated = true; + lastMD5 = MD5; + } + String updateStar = updated ? "*" : ""; +%> + <a onclick="SetAnchorDate('<%= captureTimestamp %>');" href="<%= replayUrl %>"><%= prettyDate %></a> <%= updateStar %><br></br> +<% + + } + + } +%> + </td> +<% + } + +%> + </tr> +<!-- /RESULT COLUMN DATA --> +</table> + + +<% +if(closeMatches != null && !closeMatches.isEmpty()) { + WaybackRequest tmp = wbRequest.clone(); + + + %> + Close Matches:<br> + <% + for(String closeMatch : closeMatches) { + tmp.setRequestUrl(closeMatch); + String link = tmp.getContextPrefix() + "query?" + + tmp.getQueryArguments(); + %> + <a href="<%= link %>"><%= closeMatch %></a><br> + <% + } +} +// show page indicators: +if(cResults.getNumPages() > 1) { + int curPage = cResults.getCurPageNum(); + %> + <hr></hr> + <% + for(int i = 1; i <= cResults.getNumPages(); i++) { + if(i == curPage) { + %> + <b><%= i %></b> + <% + } else { + %> + <a href="<%= results.urlForPage(i) %>"><%= i %></a> + <% + } + } +} +%> +<jsp:include page="/WEB-INF/template/UI-footer.jsp" flush="true" /> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:50:39
|
Revision: 3046 http://archive-access.svn.sourceforge.net/archive-access/?rev=3046&view=rev Author: bradtofel Date: 2010-04-14 21:50:33 +0000 (Wed, 14 Apr 2010) Log Message: ----------- FEATURE: NotInArchive error page now may provide link to url prefix search in parent directory of missing resource Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2010-04-14 21:44:57 UTC (rev 3045) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2010-04-14 21:50:33 UTC (rev 3046) @@ -5,12 +5,14 @@ <%@ page import="org.archive.wayback.core.UIResults" %> <%@ page import="org.archive.wayback.core.WaybackRequest" %> <%@ page import="org.archive.wayback.util.StringFormatter" %> +<%@ page import="org.archive.wayback.util.url.UrlOperations" %> <% UIResults results = UIResults.extractException(request); WaybackException e = results.getException(); +WaybackRequest wbr = results.getWbRequest(); e.setupResponse(response); -String contextRoot = results.getWbRequest().getContextPrefix(); - +String contextRoot = wbr.getContextPrefix(); +String requestUrl = wbr.getRequestUrl(); %> <jsp:include page="/WEB-INF/template/UI-header.jsp" flush="true" /> @@ -28,9 +30,10 @@ List<String> closeMatches = niae.getCloseMatches(); if(closeMatches != null && !closeMatches.isEmpty()) { %> - Other requests to try:<br> + <p> + Other possible close matches to try:<br></br> <% - WaybackRequest tmp = results.getWbRequest().clone(); + WaybackRequest tmp = wbr.clone(); for(String closeMatch : closeMatches) { tmp.setRequestUrl(closeMatch); String link = tmp.getContextPrefix() + "query?" + @@ -40,6 +43,23 @@ <% } } + String parentUrl = UrlOperations.getUrlParentDir(requestUrl); + if(parentUrl != null) { + WaybackRequest tmp = wbr.clone(); + tmp.setRequestUrl(parentUrl); + tmp.setUrlQueryRequest(); + String link = tmp.getContextPrefix() + "query?" + + tmp.getQueryArguments(); + String escapedLink = fmt.escapeHtml(link); + String escapedParentUrl = fmt.escapeHtml(parentUrl); + %> + </p> + <p> + More options:<br></br> + Try Searching all pages under <a href="<%= escapedLink %>"><%= escapedParentUrl %></a> + </p> + <% + } } %> <jsp:include page="/WEB-INF/template/UI-footer.jsp" flush="true" /> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:45:03
|
Revision: 3045 http://archive-access.svn.sourceforge.net/archive-access/?rev=3045&view=rev Author: bradtofel Date: 2010-04-14 21:44:57 +0000 (Wed, 14 Apr 2010) Log Message: ----------- DEPRECATED Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/DayResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/HourResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/MonthResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthTimelineResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/WeekResultsPartitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/YearResultsPartitioner.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/DayResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/DayResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/DayResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -33,6 +33,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class DayResultsPartitioner extends ResultsPartitioner { private static int MAX_SECONDS_SPANNED = 60 * 60 * 24 * 8; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/HourResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/HourResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/HourResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -33,6 +33,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class HourResultsPartitioner extends ResultsPartitioner { private static int MAX_SECONDS_SPANNED = 60 * 60 * 10; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/MonthResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/MonthResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/MonthResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -33,6 +33,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class MonthResultsPartitioner extends ResultsPartitioner { private static int MAX_SECONDS_SPANNED = 60 * 60 * 24 * 30 * 11; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartition.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -34,6 +34,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class ResultsPartition { private String startDateStr = null; // inclusive Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -35,6 +35,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public abstract class ResultsPartitioner { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsPartitionsFactory.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -38,6 +38,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class ResultsPartitionsFactory { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/ResultsTimelinePartitionsFactory.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -37,6 +37,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class ResultsTimelinePartitionsFactory { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -33,6 +33,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class TwoMonthResultsPartitioner extends ResultsPartitioner { private static int MAX_SECONDS_SPANNED = 60 * 60 * 24 * 30 * 24; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthTimelineResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthTimelineResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/TwoMonthTimelineResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -28,6 +28,11 @@ import org.archive.wayback.core.WaybackRequest; +/** + * @author brad + * @deprecated use org.archive.wayback.util.parition.* + * + */ public class TwoMonthTimelineResultsPartitioner extends TwoMonthResultsPartitioner { protected String rangeToTitle(Calendar start, Calendar end, WaybackRequest wbRequest) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/WeekResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/WeekResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/WeekResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -33,6 +33,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class WeekResultsPartitioner extends ResultsPartitioner { private static int MAX_SECONDS_SPANNED = 60 * 60 * 24 * 7 * 8; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/YearResultsPartitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/YearResultsPartitioner.java 2010-04-14 21:41:44 UTC (rev 3044) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/resultspartitioner/YearResultsPartitioner.java 2010-04-14 21:44:57 UTC (rev 3045) @@ -33,6 +33,7 @@ * * @author brad * @version $Date$, $Revision$ + * @deprecated use org.archive.wayback.util.parition.* */ public class YearResultsPartitioner extends ResultsPartitioner { private static int MAX_SECONDS_SPANNED = 60 * 60 * 24 * 365 * 50; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:41:50
|
Revision: 3044 http://archive-access.svn.sourceforge.net/archive-access/?rev=3044&view=rev Author: bradtofel Date: 2010-04-14 21:41:44 +0000 (Wed, 14 Apr 2010) Log Message: ----------- INITIAL REV: very early /liveweb/ AccessPoint, which returns and rewrites resources from the live web according to configuration of an "inner" AccessPoint Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java 2010-04-14 21:41:44 UTC (rev 3044) @@ -0,0 +1,213 @@ +/* LiveWebAccessPoint + * + * $Id$: + * + * Created on Apr 1, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.webapp; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.accesscontrol.robotstxt.RobotExclusionFilterFactory; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.ResourceNotInArchiveException; +import org.archive.wayback.exception.RobotAccessControlException; +import org.archive.wayback.exception.WaybackException; +import org.archive.wayback.liveweb.LiveWebCache; +import org.archive.wayback.resourceindex.filters.ExclusionFilter; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.springframework.beans.factory.BeanNameAware; + +/** + * @author brad + * + * AccessPoint subclass which allows no Queries, but makes all replay requests + * through a LiveWebCache + * + */ +public class LiveWebAccessPoint extends ServletRequestContext implements BeanNameAware { + private AccessPoint inner = null; + private LiveWebCache cache = null; + private RobotExclusionFilterFactory robotFactory = null; + private long maxCacheMS = 86400000; + private String beanName = null; + private int contextPort = 0; + private String contextName = null; + + public void setBeanName(String beanName) { + this.beanName = beanName; + this.contextName = ""; + int idx = beanName.indexOf(":"); + if(idx > -1) { + contextPort = Integer.valueOf(beanName.substring(0,idx)); + contextName = beanName.substring(idx + 1); + } else { + try { + this.contextPort = Integer.valueOf(beanName); + } catch(NumberFormatException e) { + e.printStackTrace(); + } + } + } + /** + * @param httpRequest HttpServletRequest which is being handled + * @return the prefix of paths received by this server that are handled by + * this WaybackContext, including the trailing '/' + */ + public String getContextPath(HttpServletRequest httpRequest) { + String httpContextPath = httpRequest.getContextPath(); + if(contextName.length() == 0) { + return httpContextPath + "/"; + } + return httpContextPath + "/" + contextName + "/"; + } + + + protected String translateRequest(HttpServletRequest httpRequest, + boolean includeQuery) { + + String origRequestPath = httpRequest.getRequestURI(); + if(includeQuery) { + String queryString = httpRequest.getQueryString(); + if (queryString != null) { + origRequestPath += "?" + queryString; + } + } + String contextPath = getContextPath(httpRequest); + if (!origRequestPath.startsWith(contextPath)) { + if(contextPath.startsWith(origRequestPath)) { + // missing trailing '/', just omit: + return ""; + } + return null; + } + return origRequestPath.substring(contextPath.length()); + } + + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) + throws ServletException, IOException { + + String urlString = translateRequest(httpRequest,true); + boolean handled = true; + WaybackRequest wbRequest = new WaybackRequest(); + wbRequest.setAccessPoint(inner); + wbRequest.setContextPrefix(inner.getAbsoluteServerPrefix(httpRequest)); + wbRequest.setServerPrefix(inner.getAbsoluteServerPrefix(httpRequest)); + wbRequest.setLiveWebRequest(true); + wbRequest.setRequestUrl(urlString); + URL url = null; + try { + try { + url = new URL(urlString); + } catch(MalformedURLException e) { + throw new BadQueryException("Bad URL(" + urlString + ")"); + } + + CaptureSearchResult result = new CaptureSearchResult(); + result.setOriginalUrl(urlString); + result.setUrlKey(urlString); + // should we check robots, first? + if(robotFactory != null) { + int ruling = robotFactory.get().filterObject(result); + if(ruling == ExclusionFilter.FILTER_EXCLUDE) { + throw new RobotAccessControlException(urlString + "is blocked by robots.txt"); + } + } + // robots says GO: + ArcResource r = (ArcResource) cache.getCachedResource(url, maxCacheMS , false); + ARCRecord ar = (ARCRecord) r.getArcRecord(); + int status = ar.getStatusCode(); + if((status == 200) || ((status >= 300) && (status < 400))) { + result.setCaptureTimestamp(ar.getMetaData().getDate()); + result.setMimeType(ar.getMetaData().getMimetype()); + CaptureSearchResults results = new CaptureSearchResults(); + results.addSearchResult(result); + + wbRequest.setReplayTimestamp(result.getCaptureTimestamp()); + + inner.getReplay().getRenderer(wbRequest, result, r).renderResource( + httpRequest, httpResponse, wbRequest, result, r, + inner.getUriConverter(), results); + } else { + throw new ResourceNotInArchiveException("Not In Archive - Not on Live web"); + } + + } catch(WaybackException e) { + inner.getException().renderException(httpRequest, httpResponse, wbRequest, + e, inner.getUriConverter()); + } + return handled; + } + + /** + * @return the cache + */ + public LiveWebCache getCache() { + return cache; + } + + /** + * @param cache the cache to set + */ + public void setCache(LiveWebCache cache) { + this.cache = cache; + } + + /** + * @return the robotFactory + */ + public RobotExclusionFilterFactory getRobotFactory() { + return robotFactory; + } + + /** + * @param robotFactory the robotFactory to set + */ + public void setRobotFactory(RobotExclusionFilterFactory robotFactory) { + this.robotFactory = robotFactory; + } + + /** + * @return the inner + */ + public AccessPoint getInner() { + return inner; + } + + /** + * @param inner the inner to set + */ + public void setInner(AccessPoint inner) { + this.inner = inner; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/LiveWebAccessPoint.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:37:38
|
Revision: 3043 http://archive-access.svn.sourceforge.net/archive-access/?rev=3043&view=rev Author: bradtofel Date: 2010-04-14 21:37:21 +0000 (Wed, 14 Apr 2010) Log Message: ----------- FEATURE: Now calls setClosest() on closest result in Replay mode Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-14 21:33:02 UTC (rev 3042) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-04-14 21:37:21 UTC (rev 3043) @@ -453,6 +453,7 @@ // TODO: check which versions are actually accessible right now? CaptureSearchResult closest = captureResults.getClosest(wbRequest, useAnchorWindow); + closest.setClosest(true); resource = collection.getResourceStore().retrieveResource(closest); p.retrieved(); ReplayRenderer renderer = replay.getRenderer(wbRequest, closest, resource); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:33:25
|
Revision: 3042 http://archive-access.svn.sourceforge.net/archive-access/?rev=3042&view=rev Author: bradtofel Date: 2010-04-14 21:33:02 +0000 (Wed, 14 Apr 2010) Log Message: ----------- FEATURE: added getUrlParentDir() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-04-14 21:28:35 UTC (rev 3041) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2010-04-14 21:33:02 UTC (rev 3042) @@ -28,6 +28,7 @@ import java.util.regex.Pattern; import org.apache.commons.httpclient.URIException; +import org.apache.log4j.Logger; import org.archive.net.UURI; import org.archive.net.UURIFactory; @@ -39,6 +40,8 @@ * @version $Date$, $Revision$ */ public class UrlOperations { + private static final Logger LOGGER = Logger.getLogger( + UrlOperations.class.getName()); public final static String DNS_SCHEME = "dns:"; public final static String HTTP_SCHEME = "http://"; @@ -116,7 +119,7 @@ try { return UURIFactory.getInstance(url).getEscapedURI(); } catch (URIException e) { - e.printStackTrace(); + LOGGER.warn(e.getLocalizedMessage() + ": " + url); // can't let a space exist... send back close to whatever came // in... return url.replace(" ", "%20"); @@ -129,7 +132,7 @@ absBaseURI = UURIFactory.getInstance(baseUrl); resolvedURI = UURIFactory.getInstance(absBaseURI, url); } catch (URIException e) { - e.printStackTrace(); + LOGGER.warn(e.getLocalizedMessage() + ": " + url); return url.replace(" ", "%20"); } return resolvedURI.getEscapedURI(); @@ -198,4 +201,33 @@ } return url; } + + /** + * Find and return the parent directory of the URL argument + * @param url to find the parent directory of + * @return parent directory of URL, or null, if either the url argument is + * invalid, or if the url is the root of the authority. + */ + public static String getUrlParentDir(String url) { + + try { + UURI uri = UURIFactory.getInstance(url); + String path = uri.getPath(); + if(path.length() > 1) { + int startIdx = path.length()-1; + if(path.charAt(path.length()-1) == '/') { + startIdx--; + } + int idx = path.lastIndexOf('/',startIdx); + if(idx >= 0) { + uri.setPath(path.substring(0,idx+1)); + uri.setQuery(null); + return uri.toString(); + } + } + } catch (URIException e) { + LOGGER.warn(e.getLocalizedMessage() + ": " + url); + } + return null; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-04-14 21:28:35 UTC (rev 3041) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2010-04-14 21:33:02 UTC (rev 3042) @@ -9,6 +9,7 @@ * @version $Date$, $Revision$ */ public class UrlOperationsTest extends TestCase { + public void testIsAuthority() { checkAuthority("foo.com",true); checkAuthority("foo.con",false); @@ -93,5 +94,40 @@ assertEquals("rtsp://",UrlOperations.urlToScheme("rtsp://a.com/")); assertEquals("mms://",UrlOperations.urlToScheme("mms://a.com/")); assertNull(UrlOperations.urlToScheme("blah://a.com/")); - } + } + + public void testGetUrlParentDir() { + + assertEquals( "http://a.b/c/", + UrlOperations.getUrlParentDir("http://a.b/c/d")); + + assertEquals( "http://a.b/", + UrlOperations.getUrlParentDir("http://a.b/c/")); + + assertEquals( "http://a.b/", + UrlOperations.getUrlParentDir("http://a.b/c")); + + assertEquals( "http://a.b/c/d/e/", + UrlOperations.getUrlParentDir("http://a.b/c/d/e/f")); + + assertEquals( "http://a.b/", + UrlOperations.getUrlParentDir("http://a.b/c?d=e")); + + assertEquals( null, + UrlOperations.getUrlParentDir("http://a.b/")); + + assertEquals( null, + UrlOperations.getUrlParentDir("http//a.b/")); + + assertEquals( null, + UrlOperations.getUrlParentDir("http://")); + + assertEquals( null, + UrlOperations.getUrlParentDir("http://#4.8gifdijdf")); + + assertEquals( null, + UrlOperations.getUrlParentDir("http://#4.8gifdijdf/a/b")); + + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:28:42
|
Revision: 3041 http://archive-access.svn.sourceforge.net/archive-access/?rev=3041&view=rev Author: bradtofel Date: 2010-04-14 21:28:35 +0000 (Wed, 14 Apr 2010) Log Message: ----------- INITIAL REV Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/StringFormatterTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/StringFormatterTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/StringFormatterTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/StringFormatterTest.java 2010-04-14 21:28:35 UTC (rev 3041) @@ -0,0 +1,95 @@ +/* StringFormatterTest + * + * $Id$: + * + * Created on Apr 13, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util; + +import java.util.Date; +import java.util.ListResourceBundle; +import java.util.Locale; +import java.util.ResourceBundle; + +import junit.framework.TestCase; + +/** + * @author brad + * + */ +public class StringFormatterTest extends TestCase { + + /** + * Tests the various format methods, albeit not very well.. + */ + public void testFormatStringObjectArray() { + ResourceBundle rb = new ListResourceBundle() { + + @Override + protected Object[][] getContents() { + Object[][] m = { + {"a","A"}, + {"b","Bee"}, + {"f0","()"}, + {"f1","{0}"}, + {"f2","{1} {0}"}, + {"f3","{2} {1} {0}"}, + {"d1","{0,date,YYYY}"}, // that should be 'yyyy' + {"d2","{0,date,yyyy}"}, + }; + return m; + } + }; + Locale l = Locale.getDefault(); + StringFormatter fmt = new StringFormatter(rb, l); + assertEquals("A",fmt.format("a")); + assertEquals("Bee",fmt.format("b")); + assertEquals("()",fmt.format("f0")); + assertEquals("{0}",fmt.format("f1")); + assertEquals("1",fmt.format("f1","1")); + assertEquals("2 1",fmt.format("f2","1","2")); + assertEquals("3 2 1",fmt.format("f3","1","2",3)); + assertEquals("d1",fmt.format("d1",new Date(0L))); + assertEquals("1970",fmt.format("d2",new Date(0L))); + } + + /** + * Test method for {@link org.archive.wayback.util.StringFormatter#escapeHtml(java.lang.String)}. + */ + public void testEscapeHtml() { + StringFormatter fmt = new StringFormatter(null, null); + assertEquals("normal",fmt.escapeHtml("normal")); + assertEquals("normal&",fmt.escapeHtml("normal&")); + assertEquals("normal"&",fmt.escapeHtml("normal\"&")); + } + + /** + * Test method for {@link org.archive.wayback.util.StringFormatter#escapeJavaScript(java.lang.String)}. + */ + public void testEscapeJavaScript() { + StringFormatter fmt = new StringFormatter(null, null); + assertEquals("normal",fmt.escapeHtml("normal")); + assertEquals("normal&",fmt.escapeHtml("normal&")); + assertEquals("normal"&",fmt.escapeHtml("normal\"&")); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/StringFormatterTest.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:25:46
|
Revision: 3040 http://archive-access.svn.sourceforge.net/archive-access/?rev=3040&view=rev Author: bradtofel Date: 2010-04-14 21:25:37 +0000 (Wed, 14 Apr 2010) Log Message: ----------- BUGFIX(unreported): now sets milliseconds to 0, making comparision of Date objects on the same second match. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/Timestamp.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/Timestamp.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/Timestamp.java 2010-04-14 21:23:31 UTC (rev 3039) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/Timestamp.java 2010-04-14 21:25:37 UTC (rev 3040) @@ -289,6 +289,7 @@ cal.set(Calendar.HOUR_OF_DAY,iHour); cal.set(Calendar.MINUTE,iMinute); cal.set(Calendar.SECOND,iSecond); + cal.set(Calendar.MILLISECOND,0); return cal; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:23:42
|
Revision: 3039 http://archive-access.svn.sourceforge.net/archive-access/?rev=3039&view=rev Author: bradtofel Date: 2010-04-14 21:23:31 +0000 (Wed, 14 Apr 2010) Log Message: ----------- BUGFIX/FEATURE: now uses explicitly sets UTC on internal DateFormat objects generated by MessageFormat FEATURE: this class now handles HTML escaping COMMENTS: Javadoc is now more-or-less complete Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java 2010-04-14 21:20:49 UTC (rev 3038) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/StringFormatter.java 2010-04-14 21:23:31 UTC (rev 3039) @@ -24,26 +24,40 @@ */ package org.archive.wayback.util; +import java.text.DateFormat; +import java.text.Format; import java.text.MessageFormat; import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.ResourceBundle; +import java.util.TimeZone; +import org.apache.commons.lang.StringEscapeUtils; + /** + * An class which assists in UI generation, primarily through Locale-aware + * String formatting, and also helps in escaping (hopefully properly) Strings + * for use in HTML. + * + * Note that date formatting done through this class forces all times to the + * UTC timezone - at the moment it appears too confusing to attempt to localize + * times in any other way.. * - * * @author brad * @version $Date$, $Revision$ */ public class StringFormatter { - + private final static TimeZone TZ_UTC = TimeZone.getTimeZone("UTC"); + ResourceBundle bundle = null; Locale locale = null; Map<String,MessageFormat> formats = null; /** - * @param bundle - * @param locale + * Construct a StringFormatter... + * @param bundle ResourceBundle to lookup patterns for MessageFormat + * objects. + * @param locale to use, where applicable with MessageFormat objects */ public StringFormatter(ResourceBundle bundle, Locale locale) { this.bundle = bundle; @@ -55,13 +69,26 @@ MessageFormat format = formats.get(pattern); if(format == null) { format = new MessageFormat(pattern,locale); + // lets try to make sure any internal DateFormats use UTC: + Format[] subFormats = format.getFormats(); + if(subFormats != null) { + for(Format subFormat : subFormats) { + if(subFormat instanceof DateFormat) { + DateFormat subDateFormat = (DateFormat) subFormat; + subDateFormat.setTimeZone(TZ_UTC); + } + } + } + formats.put(pattern,format); } return format; } /** - * @param key + * Access a localized string associated with key from the ResourceBundle, + * likely the UI.properties file. + * @param key to lookup in the ResourceBundle * @return localized String version of key argument, or key itself if * something goes wrong... */ @@ -73,68 +100,108 @@ } } - /** - * @param key - * @param args - * @return Localized String for key, interpolated with args - */ - public String format(String key, Object args[]) { + private String formatInner(String key, Object objects[]) { try { - return getFormat(getLocalized(key)).format(args); + return getFormat(getLocalized(key)).format(objects); } catch (Exception e) { e.printStackTrace(); } return key; } + + // What gives? This works in the Junit test, but not in jsps... +// /** +// * @param key String property name in UI.properties file to use as the +// * pattern for interpolation +// * @param objects array of things to interpolate within the MessageFormat +// * described by the pattern in UI.properties for key key +// * @return Localized Formatted String for key, interpolated with args +// */ +// public String format(String key, Object...objects) { +// return formatInner(key,objects); +// } /** - * @param key + * Localize a string key from the UI.properties file + * @param key String property name in UI.properties file to use as the + * pattern for the MessageFormat * @return Localized String for key */ public String format(String key) { Object args[] = {}; - return format(key,args); + return formatInner(key,args); } /** - * @param key - * @param o1 - * @return Localized String for key, interpolated with o1 + * @param key String property name in UI.properties file to use as the + * pattern for interpolation + * @param o1 thing1 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @return Localized Formatted String for key, interpolated with argument objects */ public String format(String key,Object o1) { Object args[] = {o1}; - return format(key,args); + return formatInner(key,args); } /** - * @param key - * @param o1 - * @param o2 - * @return Localized String for key, interpolated with o1,o2 + * @param key String property name in UI.properties file to use as the + * pattern for interpolation + * @param o1 thing1 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @param o2 thing2 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @return Localized Formatted String for key, interpolated with argument objects */ public String format(String key,Object o1,Object o2) { Object args[] = {o1,o2}; - return format(key,args); + return formatInner(key,args); } /** - * @param key - * @param o1 - * @param o2 - * @param o3 - * @return Localized String for key, interpolated with o1,o2,o3 + * @param key String property name in UI.properties file to use as the + * pattern for interpolation + * @param o1 thing1 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @param o2 thing2 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @param o3 thing3 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @return Localized Formatted String for key, interpolated with argument objects */ public String format(String key,Object o1,Object o2,Object o3) { Object args[] = {o1,o2,o3}; - return format(key,args); + return formatInner(key,args); } /** - * @param key - * @param o1 - * @param o2 - * @param o3 - * @param o4 - * @return Localized String for key, interpolated with o1,o2,o3,o4 + * @param key String property name in UI.properties file to use as the + * pattern for interpolation + * @param o1 thing1 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @param o2 thing2 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @param o3 thing3 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @param o4 thing4 to interpolate within the MessageFormat + * described by the pattern in UI.properties for key key + * @return Localized Formatted String for key, interpolated with argument objects */ public String format(String key,Object o1,Object o2,Object o3,Object o4) { Object args[] = {o1,o2,o3,o4}; - return format(key,args); + return formatInner(key,args); } + + /** + * handy shortcut to the apache StringEscapeUtils + * @param raw string to be escaped + * @return the string escaped so it's safe for insertion in HTML + */ + public String escapeHtml(String raw) { + return StringEscapeUtils.escapeHtml(raw); + } + /** + * handy shortcut to the apache StringEscapeUtils + * @param raw string to be escaped + * @return the string escaped so it's safe for insertion in Javascript + */ + public String escapeJavaScript(String raw) { + return StringEscapeUtils.escapeJavaScript(raw); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:21:01
|
Revision: 3038 http://archive-access.svn.sourceforge.net/archive-access/?rev=3038&view=rev Author: bradtofel Date: 2010-04-14 21:20:49 +0000 (Wed, 14 Apr 2010) Log Message: ----------- REFACTOR: removed buggy local-cache, added less buggy cache which relies on varnish/squid to do RAM/Filesystem caching, new implementation uses Heritrix HTTP Recorder code to write raw bytes from the web into ARCs, rather than whatever the apache HTTP client returned. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java 2010-04-14 21:15:01 UTC (rev 3037) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -1,157 +0,0 @@ -/* ARCCachingProxy - * - * $Id$: - * - * Created on Dec 8, 2009. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -package org.archive.wayback.liveweb; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintWriter; -import java.io.RandomAccessFile; -import java.net.URL; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.log4j.Logger; -import org.archive.io.arc.ARCLocation; -import org.archive.io.arc.ARCRecord; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.core.Resource; -import org.archive.wayback.exception.LiveDocumentNotAvailableException; -import org.archive.wayback.resourcestore.resourcefile.ArcResource; -import org.archive.wayback.webapp.ServletRequestContext; - -/** - * @author brad - * - */ -public class ARCCachingProxy extends ServletRequestContext { - - private final static String EXPIRES_HEADER = "Expires"; - - private final static String ARC_RECORD_CONTENT_TYPE = "application/x-arc-record"; - private static final Logger LOGGER = Logger.getLogger( - ARCCachingProxy.class.getName()); - private ARCCacheDirectory arcCacheDir = null; - private URLCacher cacher = null; - private long expiresMS = 60 * 60 * 1000; - /* (non-Javadoc) - * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) - */ - @Override - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws ServletException, - IOException { - - StringBuffer sb = httpRequest.getRequestURL(); - String query = httpRequest.getQueryString(); - if(query != null) { - sb.append("?").append(query); - } - URL url = new URL(sb.toString()); - FileRegion r = null; - try { - r = getLiveResource(url); - httpResponse.setStatus(httpResponse.SC_OK); - httpResponse.setContentLength((int)r.getLength()); - httpResponse.setContentType(ARC_RECORD_CONTENT_TYPE); - httpResponse.setDateHeader("Expires", System.currentTimeMillis() + expiresMS); - r.copyToOutputStream(httpResponse.getOutputStream()); - - } catch (LiveDocumentNotAvailableException e) { - - e.printStackTrace(); - httpResponse.sendError(httpResponse.SC_NOT_FOUND); - } -// httpResponse.setContentType("text/plain"); -// PrintWriter pw = httpResponse.getWriter(); -// pw.println("PathInfo:" + httpRequest.getPathInfo()); -// pw.println("RequestURI:" + httpRequest.getRequestURI()); -// pw.println("RequestURL:" + httpRequest.getRequestURL()); -// pw.println("QueryString:" + httpRequest.getQueryString()); -// pw.println("PathTranslated:" + httpRequest.getPathTranslated()); -// pw.println("ServletPath:" + httpRequest.getServletPath()); -// pw.println("ContextPath:" + httpRequest.getContextPath()); -// if(r != null) { -// pw.println("CachePath:" + r.file.getAbsolutePath()); -// pw.println("CacheStart:" + r.start); -// pw.println("CacheEnd:" + r.end); -// } else { -// pw.println("FAILED CACHE!"); -// } - - return true; - } - - - private FileRegion getLiveResource(URL url) - throws LiveDocumentNotAvailableException, IOException { - - Resource resource = null; - - LOGGER.info("Caching URL(" + url.toString() + ")"); - FileRegion region = cacher.cache2(arcCacheDir, url.toString()); - if(region != null) { - LOGGER.info("Cached URL(" + url.toString() + ") in " + - "ARC(" + region.file.getAbsolutePath() + ") at (" - + region.start + " - " + region.end + ")"); - - } else { - throw new IOException("No location!"); - } - - return region; -} - - /** - * @return the arcCacheDir - */ - public ARCCacheDirectory getArcCacheDir() { - return arcCacheDir; - } - - /** - * @param arcCacheDir the arcCacheDir to set - */ - public void setArcCacheDir(ARCCacheDirectory arcCacheDir) { - this.arcCacheDir = arcCacheDir; - } - - /** - * @return the cacher - */ - public URLCacher getCacher() { - return cacher; - } - - /** - * @param cacher the cacher to set - */ - public void setCacher(URLCacher cacher) { - this.cacher = cacher; - } -} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -0,0 +1,148 @@ +/* ARCRecordingProxy + * + * $Id$: + * + * Created on Apr 1, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.liveweb; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.httpclient.URIException; +import org.apache.log4j.Logger; +import org.archive.wayback.webapp.ServletRequestContext; + +/** + * @author brad + * + */ +public class ARCRecordingProxy extends ServletRequestContext { + + private final static String EXPIRES_HEADER = "Expires"; + private long expiresMS = 60 * 60 * 1000; + private long fakeExpiresMS = 5 * 60 * 1000; + private final static String ARC_RECORD_CONTENT_TYPE = + "application/x-arc-record"; + + private static final Logger LOGGER = + Logger.getLogger(ARCRecordingProxy.class.getName()); + + private ARCCacheDirectory arcCacheDir = null; + private URLtoARCCacher cacher = null; + /* (non-Javadoc) + * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) + */ + @Override + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws ServletException, + IOException { + + StringBuffer sb = httpRequest.getRequestURL(); + String query = httpRequest.getQueryString(); + if(query != null) { + sb.append("?").append(query); + } + FileRegion r = null; + try { + + String url = sb.toString(); + LOGGER.info("Caching URL(" + url + ")"); + r = cacher.cacheURL(url, arcCacheDir); + + httpResponse.setStatus(HttpServletResponse.SC_OK); + httpResponse.setContentLength((int)r.getLength()); + httpResponse.setContentType(ARC_RECORD_CONTENT_TYPE); + long exp = System.currentTimeMillis(); + exp += (r.isFake ? fakeExpiresMS : expiresMS); + + httpResponse.setDateHeader(EXPIRES_HEADER, exp); + + r.copyToOutputStream(httpResponse.getOutputStream()); + + } catch (URIException e) { + + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND); + } + return true; + } + + /** + * @return the arcCacheDir + */ + public ARCCacheDirectory getArcCacheDir() { + return arcCacheDir; + } + + /** + * @param arcCacheDir the arcCacheDir to set + */ + public void setArcCacheDir(ARCCacheDirectory arcCacheDir) { + this.arcCacheDir = arcCacheDir; + } + + /** + * @return the cacher + */ + public URLtoARCCacher getCacher() { + return cacher; + } + + /** + * @param cacher the cacher to set + */ + public void setCacher(URLtoARCCacher cacher) { + this.cacher = cacher; + } + + /** + * @return the expiresMS + */ + public long getExpiresMS() { + return expiresMS; + } + + /** + * @param expiresMS the expiresMS to set + */ + public void setExpiresMS(long expiresMS) { + this.expiresMS = expiresMS; + } + + /** + * @return the fakeExpiresMS + */ + public long getFakeExpiresMS() { + return fakeExpiresMS; + } + + /** + * @param fakeExpiresMS the fakeExpiresMS to set + */ + public void setFakeExpiresMS(long fakeExpiresMS) { + this.fakeExpiresMS = fakeExpiresMS; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCRecordingProxy.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -0,0 +1,128 @@ +/* ARCUnwrappingProxy + * + * $Id$: + * + * Created on Dec 10, 2009. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.liveweb; + +import java.io.IOException; +import java.util.zip.GZIPInputStream; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.methods.GetMethod; +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.util.ByteOp; +import org.archive.wayback.webapp.ServletRequestContext; + +/** + * + * ServletRequestContext which proxies to an ARCRecordingProxy, and unwraps + * the "application/x-arc-record" MIME response into the inner HTTP response, + * sending all HTTP headers AS-IS, and the HTTP Entity. + * + * Can be used to use an ARCRecordingProxy with a UserAgent expecting real + * HTTP responses, not "application/x-arc-record". A web browser for example. + * + * @author brad + * + */ +public class ARCUnwrappingProxy extends ServletRequestContext { + + private MultiThreadedHttpConnectionManager connectionManager = null; + private HostConfiguration hostConfiguration = null; + /** + * + */ + public ARCUnwrappingProxy() { + connectionManager = new MultiThreadedHttpConnectionManager(); + hostConfiguration = new HostConfiguration(); + } + +// protected HttpClient http = new HttpClient( +// new MultiThreadedHttpConnectionManager()); + + /* (non-Javadoc) + * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) + */ + @Override + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws ServletException, + IOException { + StringBuffer sb = httpRequest.getRequestURL(); + String query = httpRequest.getQueryString(); + if(query != null) { + sb.append("?").append(query); + } +// URL url = new URL(sb.toString()); + HttpMethod method = new GetMethod(sb.toString()); +// method.addRequestHeader("User-Agent", userAgent); + boolean got200 = false; + try { + HttpClient http = new HttpClient(connectionManager); + http.setHostConfiguration(hostConfiguration); + + int status = http.executeMethod(method); + if(status == 200) { + ARCRecord r = + new ARCRecord(new GZIPInputStream( + method.getResponseBodyAsStream()), + "id",0L,false,false,true); + r.skipHttpHeader(); + httpResponse.setStatus(r.getStatusCode()); + Header headers[] = r.getHttpHeaders(); + for(Header header : headers) { + httpResponse.addHeader(header.getName(), header.getValue()); + } + + ByteOp.copyStream(r, httpResponse.getOutputStream()); + got200 = true; + } + } finally { + method.releaseConnection(); + + } + + return got200; + } + + /** + * @param hostPort location of ARCRecordingProxy ServletRequestContext, ex: + * "localhost:3128" + */ + public void setProxyHostPort(String hostPort) { + int colonIdx = hostPort.indexOf(':'); + if(colonIdx > 0) { + String host = hostPort.substring(0,colonIdx); + int port = Integer.valueOf(hostPort.substring(colonIdx+1)); + hostConfiguration.setProxy(host, port); + } + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCUnwrappingProxy.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java 2010-04-14 21:15:01 UTC (rev 3037) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -38,9 +38,20 @@ File file = null; long start = -1; long end = -1; + boolean isFake = false; + /** + * @return the number of bytes in this record, including headers. If the + * containing file is compressed, then this represents the number of + * compressed bytes. + */ public long getLength() { return end - start; } + /** + * Copy this record to the provided OutputStream + * @param o the OutputStream where the bytes should be sent. + * @throws IOException for usual reasons + */ public void copyToOutputStream(OutputStream o) throws IOException { long left = end - start; int BUFF_SIZE = 4096; @@ -58,5 +69,4 @@ } raf.close(); } - } Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2010-04-14 21:15:01 UTC (rev 3037) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebLocalResourceIndex.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -1,55 +0,0 @@ -/* LiveWebLocalResourceIndex - * - * $Id$ - * - * Created on 5:53:29 PM Mar 13, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.liveweb; - -import java.io.IOException; -import java.util.ArrayList; - -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.resourceindex.LocalResourceIndex; - -/** - * Alternate LocalResourceIndex that supports an alternate BDB configuration, - * and allows adding of SearchResults to the index. - * - * @author brad - * @version $Date$, $Revision$ - */ -public class LiveWebLocalResourceIndex extends LocalResourceIndex { - - /** - * Add a single SearchResult to the index. - * @param result - * @throws IOException - * @throws UnsupportedOperationException - */ - public void addSearchResult(CaptureSearchResult result) - throws UnsupportedOperationException, IOException { - - ArrayList<CaptureSearchResult> l = new ArrayList<CaptureSearchResult>(); - l.add(result); - addSearchResults(l.iterator()); - } -} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -0,0 +1,136 @@ +/* RemoteLiveWebCache + * + * $Id$: + * + * Created on Dec 15, 2009. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.liveweb; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.ConnectException; +import java.net.URL; +import java.util.zip.GZIPInputStream; + +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; +import org.apache.commons.httpclient.methods.GetMethod; +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.core.Resource; +import org.archive.wayback.exception.LiveDocumentNotAvailableException; +import org.archive.wayback.exception.LiveWebCacheUnavailableException; +import org.archive.wayback.exception.ResourceNotAvailableException; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.archive.wayback.resourcestore.resourcefile.ResourceFactory; + +/** + * @author brad + * + */ +public class RemoteLiveWebCache implements LiveWebCache { + + private MultiThreadedHttpConnectionManager connectionManager = null; + private HostConfiguration hostConfiguration = null; + private HttpClient http = null; + /** + * + */ + public RemoteLiveWebCache() { + connectionManager = new MultiThreadedHttpConnectionManager(); + hostConfiguration = new HostConfiguration(); + http = new HttpClient(connectionManager); + http.setHostConfiguration(hostConfiguration); + } + + /* (non-Javadoc) + * @see org.archive.wayback.liveweb.LiveWebCache#getCachedResource(java.net.URL, long, boolean) + */ + public Resource getCachedResource(URL url, long maxCacheMS, + boolean bUseOlder) throws LiveDocumentNotAvailableException, + LiveWebCacheUnavailableException, IOException { + String urlString = url.toExternalForm(); + HttpMethod method = new GetMethod(urlString); + try { + int status = http.executeMethod(method); + if(status == 200) { + ByteArrayInputStream bais = new ByteArrayInputStream(method.getResponseBody()); + ARCRecord r = new ARCRecord( + new GZIPInputStream(bais), + "id",0L,false,false,true); + ArcResource ar = (ArcResource) + ResourceFactory.ARCArchiveRecordToResource(r, null); + if(ar.getStatusCode() == 502) { + throw new LiveDocumentNotAvailableException(urlString); + } + return ar; + + } else { + throw new LiveWebCacheUnavailableException(urlString); + } + } catch (ResourceNotAvailableException e) { + throw new LiveDocumentNotAvailableException(urlString); + } catch (ConnectException e) { + throw new LiveWebCacheUnavailableException(e.getLocalizedMessage() + + " : " + urlString); + } finally { + method.releaseConnection(); + } + } + + /* (non-Javadoc) + * @see org.archive.wayback.liveweb.LiveWebCache#shutdown() + */ + public void shutdown() { + // TODO Auto-generated method stub + } + + + /** + * @param hostPort to proxy requests through - ex. "localhost:3128" + */ + public void setProxyHostPort(String hostPort) { + int colonIdx = hostPort.indexOf(':'); + if(colonIdx > 0) { + String host = hostPort.substring(0,colonIdx); + int port = Integer.valueOf(hostPort.substring(colonIdx+1)); + +// http.getHostConfiguration().setProxy(host, port); + hostConfiguration.setProxy(host, port); + } + } + /** + * @param maxTotalConnections the HttpConnectionManagerParams config + */ + public void setMaxTotalConnections(int maxTotalConnections) { + connectionManager.getParams(). + setMaxTotalConnections(maxTotalConnections); + } + /** + * @param maxHostConnections the HttpConnectionManagerParams config + */ + public void setMaxHostConnections(int maxHostConnections) { + connectionManager.getParams(). + setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/RemoteLiveWebCache.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2010-04-14 21:15:01 UTC (rev 3037) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -1,525 +0,0 @@ -/* URLCacher - * - * $Id$ - * - * Created on 5:30:31 PM Mar 12, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-svn. - * - * wayback-svn is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-svn is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.liveweb; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.ConnectException; -import java.net.MalformedURLException; -import java.net.NoRouteToHostException; -import java.net.URL; -import java.net.UnknownHostException; -import java.util.Arrays; -import java.util.Date; -import java.util.Properties; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.commons.httpclient.ConnectTimeoutException; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpMethodBase; -import org.apache.commons.httpclient.HttpState; -import org.apache.commons.httpclient.SimpleHttpConnectionManager; -import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; -import org.archive.io.arc.ARCLocation; -import org.archive.io.arc.ARCWriter; -import org.archive.net.LaxURI; -import org.archive.wayback.exception.LiveDocumentNotAvailableException; - -/** - * Class for performing an HTTP GET request, and storing all related info - * required to create a valid ARC Record. This info is also actually stored in - * an ARC file via an ARCWriter. This should leverage more Heritrix fetcher code - * but because the Heritrix settings system is tightly coupled with the fetcher - * code, we'll try to limp by with this class until it gets untangled. - * - * @author brad - * @version $Date$, $Revision$ - */ -public class URLCacher { - private static final Logger LOGGER = Logger.getLogger( - URLCacher.class.getName()); - - private static final String CACHE_PATH = "liveweb.tmp.dir"; - - protected File tmpDir = null; - @SuppressWarnings("unchecked") - private final ThreadLocal tl = new ThreadLocal() { - protected synchronized Object initialValue() { - HttpClient http = new HttpClient(); - IPHttpConnectionManager manager = new IPHttpConnectionManager(); - manager.getParams().setConnectionTimeout(10000); - manager.getParams().setSoTimeout(10000); - http.setHttpConnectionManager(manager); - return http; - } - }; - private HttpClient getHttpClient() { - return (HttpClient) tl.get(); - } - - private File getTmpFile() { - String tmpName; - File tmpFile; - try { - tmpFile = File.createTempFile("robot-tmp-",null); - tmpName = tmpFile.getName(); - tmpFile.delete(); - } catch (IOException e) { - tmpName = "oops" + Thread.currentThread().getName(); - e.printStackTrace(); - } - tmpFile = new File(tmpDir,tmpName); - if (tmpFile.exists()) { - tmpFile.delete(); - } - return tmpFile; - } - - protected ExtendedGetMethod urlToFile(String urlString, File file) - throws LiveDocumentNotAvailableException, URIException, IOException { - - HttpClient http = getHttpClient(); - OutputStream os = new FileOutputStream(file); - ExtendedGetMethod method = new ExtendedGetMethod(os); - LaxURI lURI = new LaxURI(urlString,true); - method.setURI(lURI); - try { - int code = http.executeMethod(method); - os.close(); - // TODO: Constant 200 - if(code != 200) { - throw new LiveDocumentNotAvailableException(urlString); - } - } catch (HttpException e) { - e.printStackTrace(); - throw new LiveDocumentNotAvailableException(urlString); - } catch(UnknownHostException e) { - LOGGER.info("Unknown host for URL " + urlString); - throw new LiveDocumentNotAvailableException(urlString); - } catch(ConnectTimeoutException e) { - LOGGER.info("Connection Timeout for URL " + urlString); - throw new LiveDocumentNotAvailableException(urlString); - } catch(NoRouteToHostException e) { - LOGGER.info("No route to host for URL " + urlString); - throw new LiveDocumentNotAvailableException(urlString); - } catch(ConnectException e) { - LOGGER.info("ConnectException URL " + urlString); - throw new LiveDocumentNotAvailableException(urlString); - } - LOGGER.info("Stored " + urlString + " in " + file.getAbsolutePath()); - return method; - } - - private ARCLocation storeFile(File file, ARCWriter writer, String url, - ExtendedGetMethod method) throws IOException { - - FileInputStream fis = new FileInputStream(file); - int len = (int) file.length(); - String mime = method.getMime(); - String ip = method.getRemoteIP(); - Date captureDate = method.getCaptureDate(); - - writer.checkSize(); - final long arcOffset = writer.getPosition(); - final String arcPath = writer.getFile().getAbsolutePath(); - - writer.write(url,mime,ip,captureDate.getTime(),len,fis); - writer.checkSize(); - long newSize = writer.getPosition(); - long oSize = writer.getFile().length(); - final long arcEndOffset = oSize; - LOGGER.info("Wrote " + url + " at " + arcPath + ":" + arcOffset); - LOGGER.info("NewSize:" + newSize + " oSize: " + oSize); - fis.close(); - - return new ARCLocation() { - private String filename = arcPath; - private long offset = arcOffset; - private long endOffset = arcEndOffset; - - public String getName() { return this.filename; } - public long getOffset() { return this.offset; } - public long getEndOffset() { return this.endOffset; } - - }; - } - private FileRegion storeFile2(File file, ARCWriter writer, String url, - ExtendedGetMethod method) throws IOException { - - FileInputStream fis = new FileInputStream(file); - int len = (int) file.length(); - String mime = method.getMime(); - String ip = method.getRemoteIP(); - Date captureDate = method.getCaptureDate(); - - writer.checkSize(); - final long arcOffset = writer.getPosition(); - final String arcPath = writer.getFile().getAbsolutePath(); - - writer.write(url,mime,ip,captureDate.getTime(),len,fis); - writer.checkSize(); - long newSize = writer.getPosition(); - long oSize = writer.getFile().length(); - final long arcEndOffset = oSize; - LOGGER.info("Wrote " + url + " at " + arcPath + ":" + arcOffset); - LOGGER.info("NewSize:" + newSize + " oSize: " + oSize); - fis.close(); - FileRegion fr = new FileRegion(); - fr.file = writer.getFile(); - fr.start = arcOffset; - fr.end = oSize; - return fr; - } - - /** - * Retrieve urlString, and store using ARCWriter, returning - * ARCLocation where the document was stored. - * - * @param cache - * @param urlString - * @return ARCLocation where document was stored - * @throws LiveDocumentNotAvailableException - * @throws URIException - * @throws IOException if something internal went wrong. - */ - public ARCLocation cache(ARCCacheDirectory cache, String urlString) - throws LiveDocumentNotAvailableException, IOException, URIException { - - // localize URL - File tmpFile = getTmpFile(); - ExtendedGetMethod method; - try { - method = urlToFile(urlString,tmpFile); - } catch (LiveDocumentNotAvailableException e) { - LOGGER.info("Attempted to get " + urlString + " failed..."); - tmpFile.delete(); - throw e; - } catch (URIException e) { - tmpFile.delete(); - throw e; - } catch (IOException e) { - tmpFile.delete(); - throw e; - } - - // store URL - ARCLocation location = null; - ARCWriter writer = null; - try { - writer = cache.getWriter(); - location = storeFile(tmpFile, writer, urlString, method); - } catch(IOException e) { - e.printStackTrace(); - throw e; - } finally { - if(writer != null) { - cache.returnWriter(writer); - } - tmpFile.delete(); - } - return location; - } - public FileRegion cache2(ARCCacheDirectory cache, String urlString) - throws LiveDocumentNotAvailableException, IOException, URIException { - - // localize URL - File tmpFile = getTmpFile(); - ExtendedGetMethod method; - try { - method = urlToFile(urlString,tmpFile); - } catch (LiveDocumentNotAvailableException e) { - LOGGER.info("Attempted to get " + urlString + " failed..."); - tmpFile.delete(); - throw e; - } catch (URIException e) { - tmpFile.delete(); - throw e; - } catch (IOException e) { - tmpFile.delete(); - throw e; - } - - // store URL - FileRegion region = null; - ARCWriter writer = null; - try { - writer = cache.getWriter(); - region = storeFile2(tmpFile, writer, urlString, method); - } catch(IOException e) { - e.printStackTrace(); - throw e; - } finally { - if(writer != null) { - cache.returnWriter(writer); - } - tmpFile.delete(); - } - return region; -} - - /** - * @param args - */ - public static void main(String[] args) { - int DEFAULT_MAX_ARC_FILE_SIZE = 1024 * 1024 * 100; - File arcDir = new File(args[0]); - URL url; - if(!arcDir.isDirectory()) { - arcDir.mkdir(); - } - File [] files = {arcDir}; - boolean compress = true; - ARCWriter writer = new ARCWriter(new AtomicInteger(), - Arrays.asList(files), "test", compress, - DEFAULT_MAX_ARC_FILE_SIZE); - Properties p = new Properties(); - p.setProperty(ARCCacheDirectory.LIVE_WEB_ARC_DIR, args[0]); - p.setProperty(ARCCacheDirectory.LIVE_WEB_ARC_PREFIX, "test"); - p.setProperty(CACHE_PATH, arcDir.getAbsolutePath()); - - URLCacher uc = new URLCacher(); - ARCCacheDirectory cache = new ARCCacheDirectory(); -// try { -//// cache.init(p); -//// uc.init(p); -// } catch (ConfigurationException e) { -// e.printStackTrace(); -// System.exit(1); -// } - for(int k = 1; k < args.length; k++) { - try { - url = new URL(args[k]); - } catch (MalformedURLException e1) { - e1.printStackTrace(); - continue; - } - try { - uc.cache(cache, url.toString()); - } catch (URIException e) { - e.printStackTrace(); - } catch (LiveDocumentNotAvailableException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } - try { - writer.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - /* - * Get method which stores the entire HTTP response: message, headers & body - * in the OutputStream provided, and also provides access to the data needed - * to generate an ARC record: IP, Date and Mime - */ - private class ExtendedGetMethod extends HttpMethodBase { - - private String remoteIP = ""; - private Date captureDate = null; - private String mime = "unk"; - private OutputStream os = null; - - /** - * Constructor - * - * @param os - */ - public ExtendedGetMethod(OutputStream os) { - super(); - this.os = os; - } - - /* (non-Javadoc) - * @see org.apache.commons.httpclient.HttpMethodBase#getName() - */ - public String getName() { - return "GET"; - } - - protected void processStatusLine(HttpState state, HttpConnection conn) { - captureDate = new Date(); - IPStoringHttpConnection bhc = (IPStoringHttpConnection) conn; - remoteIP = bhc.getRemoteIP(); - try { - String statusLine = this.getStatusLine().toString() + "\r\n"; - os.write(statusLine.getBytes()); - } catch (IOException e) { - // TODO hrm..? - e.printStackTrace(); - } - } - - protected void processResponseBody(HttpState state, HttpConnection conn) { - try { - - // copy the HTTP Headers... - Header headers[] = this.getResponseHeaders(); - for (int i = 0; i < headers.length; i++) { - if(headers[i].getName().equals("Content-Type")) { - mime = headers[i].getValue(); - } - os.write(headers[i].toExternalForm().getBytes()); - } - os.write(new String("\r\n").getBytes()); - - // now copy the whole response body: - - InputStream is = this.getResponseStream(); - final int BUFFER_SIZE = 1024 * 4; - byte[] buffer = new byte[BUFFER_SIZE]; - while (true) { - int x = is.read(buffer); - if (x == -1) { - break; - } - os.write(buffer, 0, x); - } - //is.close(); - os.close(); - - } catch (IOException e) { - // TODO don't eat it - e.printStackTrace(); - } - } - - /** - * @return Returns the captureDate. - */ - public Date getCaptureDate() { - return captureDate; - } - - /** - * @return Returns the mime. - */ - public String getMime() { - return mime; - } - - /** - * @return Returns the remoteIP. - */ - public String getRemoteIP() { - return remoteIP; - } - - } - - /** - * HttpConnectionManager that returns IPHttpConnection objects, for - * accessing the IP address - */ - private class IPHttpConnectionManager extends SimpleHttpConnectionManager { - public HttpConnection getConnection(HostConfiguration hostConfiguration) { - IPStoringHttpConnection conn = new IPStoringHttpConnection(hostConfiguration); - conn.setHttpConnectionManager(this); - conn.getParams().setDefaults(this.getParams()); - return conn; - } - - public HttpConnection getConnectionWithTimeout( - HostConfiguration hostConfiguration, long timeout) { - // TODO: is this lying? have we really set the time out? - IPStoringHttpConnection conn = new IPStoringHttpConnection(hostConfiguration); - conn.setHttpConnectionManager(this); - conn.getParams().setDefaults(this.getParams()); - return conn; - } - - public HttpConnection getConnection( - HostConfiguration hostConfiguration, long timeout) { - - return new IPStoringHttpConnection(hostConfiguration); - } - public void releaseConnection(HttpConnection conn) { - // ensure connection is closed - conn.close(); - InputStream lastResponse = conn.getLastResponseInputStream(); - if (lastResponse != null) { - conn.setLastResponseInputStream(null); - try { - lastResponse.close(); - } catch (IOException ioe) { - //FIX ME: badness - close to force reconnect. - conn.close(); - } - } - } - } - - /** - * HttpConnection that allows access to the IP address which was - * used for the connection. - */ - private class IPStoringHttpConnection extends HttpConnection { - - /** - * @param hc - */ - public IPStoringHttpConnection(HostConfiguration hc) { - super(hc); - } - /** - * @return the remote IP address that was connected to, as a String - */ - public String getRemoteIP() { - return getSocket().getInetAddress().getHostAddress(); - } - } - - /** - * @return the tmpDir - */ - public String getTmpDir() { - if(tmpDir == null) { - return null; - } - return tmpDir.getAbsolutePath(); - } - - /** - * @param tmpDir the tmpDir to set - */ - public void setTmpDir(String tmpDir) { - this.tmpDir = new File(tmpDir); - if(!this.tmpDir.exists()) { - this.tmpDir.mkdirs(); - } - } - -} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-04-14 21:20:49 UTC (rev 3038) @@ -0,0 +1,421 @@ +/* URLtoARCCacher + * + * $Id$: + * + * Created on Mar 26, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.liveweb; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.UnknownHostException; +import java.util.Date; + +import org.apache.commons.httpclient.ConnectTimeoutException; +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.HostConfiguration; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpConnection; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpState; +import org.apache.commons.httpclient.SimpleHttpConnectionManager; +import org.apache.commons.httpclient.URIException; +import org.apache.commons.httpclient.cookie.CookiePolicy; +import org.apache.commons.httpclient.params.HttpClientParams; +import org.apache.log4j.Logger; +import org.archive.httpclient.HttpRecorderGetMethod; +import org.archive.io.RecordingInputStream; +import org.archive.io.arc.ARCWriter; +import org.archive.net.LaxURI; +import org.archive.util.Recorder; +import org.archive.wayback.util.ByteOp; + +/** + * + * Takes an input URL String argument, downloads, stores in an ARCWriter, + * and returns a FileRegion consisting of the compressed ARCRecord containing + * the response, or a forged, "fake error response" ARCRecord which can be + * used to send the content to an OutputStream. + * + * @author brad + * + */ +public class URLtoARCCacher { + private static final Logger LOGGER = Logger.getLogger( + URLtoARCCacher.class.getName()); + + private static String CONTENT_TYPE_HEADER = "Content-Type".toLowerCase(); + private static String GET_METHOD_NAME = "GET"; + + private static String DEFAULT_RECORDER_DIR = "/var/tmp/brad/recorder"; + private File recorderCacheDir = new File(DEFAULT_RECORDER_DIR); + + private static String DEFAULT_BACKING_FILE_BASE = "recorder-tmp"; + private String backingFileBase = DEFAULT_BACKING_FILE_BASE; + private String userAgent = "genericUserAgent"; + private int connectionTimeoutMS = 10000; + private int socketTimeoutMS = 10000; + private int outBufferSize = 1024 * 100; + private int inBufferSize = 1024 * 100; + + private final ThreadLocal<HttpClient> tl = new ThreadLocal<HttpClient>() { + + protected synchronized HttpClient initialValue() { + HttpClient http = new HttpClient(); + IPHttpConnectionManager manager = new IPHttpConnectionManager(); + manager.getParams().setConnectionTimeout(connectionTimeoutMS); + manager.getParams().setSoTimeout(socketTimeoutMS); + http.setHttpConnectionManager(manager); + HttpClientParams clientParams = new HttpClientParams(); + clientParams.setParameter("http.useragent", userAgent); + return http; + } + }; + + private HttpClient getHttpClient() { + return tl.get(); + } + + + private static byte[] ERROR_BYTES = "HTTP 502 Bad Gateway\n\n".getBytes(); + private static String ERROR_MIME = "unk"; + private static String ERROR_IP = "0.0.0.0"; + + /** + * @param url to cache + * @param cache ARCCacheDirectory for storing result or faked result + * @return FileRegion of compressed byte range for ARCRecord. + * @throws IOException for the usual reasons + * @throws URIException if url argument isn't really an URL.. + */ + public FileRegion cacheURL(String url, ARCCacheDirectory cache) + throws IOException, URIException { + + FileRegion region = null; + + // to track if we got a response (any response) or an exception. + boolean gotUrl = false; + + Recorder recorder = new Recorder(recorderCacheDir,backingFileBase, + outBufferSize, inBufferSize); + + ExtendedGetMethod getMethod = null; + + // TWO STEPS: + // first do the GET, using a Recorder to get the response. + // then, if that worked, save the recorded value into an ARC + // and return it's region + // if we didn't get a response, forge a fake record and return that. + try { + Recorder.setHttpRecorder(recorder); + LaxURI lURI = new LaxURI(url,true); + getMethod = new ExtendedGetMethod(url,recorder); + getMethod.setURI(lURI); + HttpClient client = getHttpClient(); + getMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES); + getMethod.setFollowRedirects(false); + int code = client.executeMethod(getMethod); + LOGGER.info("URL(" + url + ") HTTP:" + code); + ByteOp.discardStream(getMethod.getResponseBodyAsStream()); + getMethod.releaseConnection(); + recorder.closeRecorders(); + gotUrl = true; + + } catch (URIException e) { + e.printStackTrace(); + } catch (UnknownHostException e) { + LOGGER.warn("Unknown host for " + url); + } catch (ConnectTimeoutException e) { + // TODO: should we act like it's a full block? + LOGGER.warn("Timeout out connecting to " + url); + } catch (HttpException e) { + e.printStackTrace(); + // we have to let IOExceptions out, problems caused by local disk + // NEED to return errors, indicating that there is not an + // authoritative answer, and thus... NOTHING can be shown. +// } catch (IOException e) { +// e.printStackTrace(); + } finally { + Recorder.setHttpRecorder(null); + } + + // now write the content, or a fake record: + ARCWriter writer = null; + try { + writer = cache.getWriter(); + if(gotUrl) { + + RecordingInputStream ris = recorder.getRecordedInput(); + region = storeInputStreamARCRecord(writer, url, + getMethod.getMime(), getMethod.getRemoteIP(), + getMethod.getCaptureDate(), + ris.getReplayInputStream(), (int) ris.getSize()); + + } else { + region = storeNotAvailable(writer, url); + } + + } finally { + if(writer != null) { + cache.returnWriter(writer); + } + } + recorder.close(); + + return region; + } + + private FileRegion storeInputStreamARCRecord(ARCWriter writer, + String url, String mime, String ip, Date captureDate, + InputStream is, int length) throws IOException { + + writer.checkSize(); + final long arcOffset = writer.getPosition(); + final String arcPath = writer.getFile().getAbsolutePath(); + + writer.write(url,mime,ip,captureDate.getTime(),length,is); + writer.checkSize(); +// long newSize = writer.getPosition(); + long oSize = writer.getFile().length(); +// final long arcEndOffset = oSize; + LOGGER.info("Wrote " + url + ": " + arcPath + "(" + arcOffset + + "-" + oSize + ")"); + + FileRegion fr = new FileRegion(); + fr.file = writer.getFile(); + fr.start = arcOffset; + fr.end = oSize; + fr.isFake = false; + return fr; + } + + private FileRegion storeNotAvailable(ARCWriter writer, String url) + throws IOException { + + ByteArrayInputStream bais = new ByteArrayInputStream(ERROR_BYTES); + FileRegion fr = storeInputStreamARCRecord(writer, url, + ERROR_MIME, ERROR_IP, new Date(), bais, ERROR_BYTES.length); + fr.isFake = true; + return fr; + } + + /* + * Get method which ferrets away the Content-Type header, the remote IP + * and remembers when the HTTP Message header was received. + */ + private class ExtendedGetMethod extends HttpRecorderGetMethod { + + /** + * @param uri to be fetched + * @param recorder which is not currently used by base class, but + * we're going to require and send it on anyways. + */ + public ExtendedGetMethod(String uri, Recorder recorder) { + super(uri, recorder); + } + + private String remoteIP = ""; + private Date captureDate = null; + private String mime = "unk"; + + public String getName() { + return GET_METHOD_NAME; + } + + protected void processStatusLine(HttpState state, HttpConnection conn) { + // grab the remote IP, and record when we started getting bytes.. + // Sam thinks we should somehow record how fast we got it back.. + // and then replay it at the same rate we received it. + + captureDate = new Date(); + IPStoringHttpConnection bhc = (IPStoringHttpConnection) conn; + remoteIP = bhc.getRemoteIP(); + } + protected void processResponseBody(HttpState state, HttpConnection conn) { + // grab the mime.. + Header headers[] = this.getResponseHeaders(); + for (int i = 0; i < headers.length; i++) { + String lcHeader = headers[i].getName().toLowerCase(); + if(lcHeader.compareTo(CONTENT_TYPE_HEADER) == 0) { + mime = headers[i].getValue(); + } + } + } + + /** + * @return Returns the captureDate. + */ + public Date getCaptureDate() { + return captureDate; + } + + /** + * @return Returns the mime. + */ + public String getMime() { + return mime; + } + + /** + * @return Returns the remoteIP. + */ + public String getRemoteIP() { + return remoteIP; + } + } + + /** + * HttpConnectionManager that returns IPHttpConnection objects, for + * accessing the IP address + */ + private class IPHttpConnectionManager extends SimpleHttpConnectionManager { + public HttpConnection getConnection(HostConfiguration hostConfiguration) { + IPStoringHttpConnection conn = new IPStoringHttpConnection(hostConfiguration); + conn.setHttpConnectionManager(this); + conn.getParams().setDefaults(this.getParams()); + return conn; + } + + public HttpConnection getConnectionWithTimeout( + HostConfiguration hostConfiguration, long timeout) { + // TODO: is this lying? have we really set the time out? + IPStoringHttpConnection conn = + new IPStoringHttpConnection(hostConfiguration); + conn.setHttpConnectionManager(this); + conn.getParams().setDefaults(this.getParams()); + return conn; + } + + public HttpConnection getConnection( + HostConfiguration hostConfiguration, long timeout) { + + return new IPStoringHttpConnection(hostConfiguration); + } + public void releaseConnection(HttpConnection conn) { + // ensure connection is closed + conn.close(); + InputStream lastResponse = conn.getLastResponseInputStream(); + if (lastResponse != null) { + conn.setLastResponseInputStream(null); + try { + lastResponse.close(); + } catch (IOException ioe) { + //FIX ME: badness - close to force reconnect. + conn.close(); + } + } + } + } + + /** + * HttpConnection that allows access to the IP address which was + * used for the connection. + */ + private class IPStoringHttpConnection extends HttpConnection { + + /** + * @param hc HostConfiguration + */ + public IPStoringHttpConnection(HostConfiguration hc) { + super(hc); + } + /** + * @return the remote IP address that was connected to, as a String + */ + public String getRemoteIP() { + return getSocket().getInetAddress().getHostAddress(); + } + } + + /** + * @return the recorderCacheDir + */ + public String getRecorderCacheDir() { + return recorderCacheDir.getAbsolutePath(); + } + + /** + * @param recorderCacheDirPath the recorderCacheDir to set + */ + public void setRecorderCacheDir(String recorderCacheDirPath) { + this.recorderCacheDir = new File(recorderCacheDirPath); + } + + /** + * @return the backingFileBase + */ + public String getBackingFileBase() { + return backingFileBase; + } + + /** + * @param backingFileBase the backingFileBase to set + */ + public void setBackingFileBase(String backingFileBase) { + this.backingFileBase = backingFileBase; + } + + /** + * @return the userAgent + */ + public String getUserAgent() { + return userAgent; + } + + /** + * @param userAgent the userAgent to set + */ + public void setUserAgent(String userAgent) { + this.userAgent = userAgent; + } + + /** + * @return the connectionTimeoutMS + */ + public int getConnectionTimeoutMS() { + return connectionTimeoutMS; + } + + /** + * @param connectionTimeoutMS the connectionTimeoutMS to set + */ + public void setConnectionTimeoutMS(int connectionTimeoutMS) { + this.connectionTimeoutMS = connectionTimeoutMS; + } + + /** + * @return the socketTimeoutMS + */ + public int getSocketTimeoutMS() { + return socketTimeoutMS; + } + + /** + * @param socketTimeoutMS the socketTimeoutMS to set + */ + public void setSocketTimeoutMS(int socketTimeoutMS) { + this.socketTimeoutMS = socketTimeoutMS; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:15:07
|
Revision: 3037 http://archive-access.svn.sourceforge.net/archive-access/?rev=3037&view=rev Author: bradtofel Date: 2010-04-14 21:15:01 +0000 (Wed, 14 Apr 2010) Log Message: ----------- REFACTOR: made this an interface, currently removing Local implementations - relies on squid/varnish to do the real-time caching rather than buggy, slower native implementation. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2010-04-14 21:12:22 UTC (rev 3036) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2010-04-14 21:15:01 UTC (rev 3037) @@ -26,317 +26,41 @@ import java.io.IOException; import java.net.URL; -import java.util.Date; -import org.apache.commons.httpclient.URIException; -import org.apache.log4j.Logger; -import org.archive.io.arc.ARCLocation; -import org.archive.io.arc.ARCRecord; -import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.Resource; -import org.archive.wayback.core.CaptureSearchResult; -import org.archive.wayback.core.CaptureSearchResults; -import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.LiveDocumentNotAvailableException; -import org.archive.wayback.exception.ResourceNotInArchiveException; -import org.archive.wayback.exception.WaybackException; -import org.archive.wayback.resourcestore.indexer.ARCRecordToSearchResultAdapter; -import org.archive.wayback.resourcestore.resourcefile.ArcResource; -import org.archive.wayback.util.Timestamp; -import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; +import org.archive.wayback.exception.LiveWebCacheUnavailableException; + /** + * Interface to retrieve Resource objects from the live web. * - * * @author brad * @version $Date$, $Revision$ */ -public class LiveWebCache { - private static final Logger LOGGER = Logger.getLogger( - LiveWebCache.class.getName()); - - private long maxFailedCacheMS = 600000; - private ARCCacheDirectory arcCacheDir = null; - private URLCacher cacher = null; - private LiveWebLocalResourceIndex index = null; - private UrlCanonicalizer canonicalizer = null; - private ARCRecordToSearchResultAdapter adapter = null; - - public LiveWebCache() { - canonicalizer = new AggressiveUrlCanonicalizer(); - adapter = new ARCRecordToSearchResultAdapter(); - adapter.setCanonicalizer(canonicalizer); - } - +public interface LiveWebCache { /** - * closes all resources - */ - public void shutdown() { - arcCacheDir.shutdown(); - } - - private WaybackRequest makeCacheWBRequest(URL url, long maxCacheMS, - boolean bUseOlder) throws URIException { - WaybackRequest req = new WaybackRequest(); - req.setRequestUrl(url.toString()); - req.setReplayRequest(); - req.setReplayTimestamp(Timestamp.currentTimestamp().getDateStr()); - Timestamp earliest = null; - if(bUseOlder) { - earliest = Timestamp.earliestTimestamp(); - } else { - Date d = new Date(System.currentTimeMillis() - maxCacheMS); - earliest = new Timestamp(d); - } - req.setStartTimestamp(earliest.getDateStr()); - // for now, assume all live web requests are only satisfiable by the - // exact host -- no massaging. - req.setExactHost(true); - return req; - } - - private boolean isForgedFailRecentEnough(CaptureSearchResult result) { - String captureDate = result.getCaptureTimestamp(); - Timestamp t = new Timestamp(captureDate); - long maxAge = System.currentTimeMillis() - maxFailedCacheMS; - long failAge = t.getDate().getTime(); - if(failAge > maxAge) { - return true; - } - return false; - } - - private boolean isForgedFailedSearchResult(CaptureSearchResult result) { - String arcFile = result.getFile(); - return arcFile.equals("-"); - } - - private CaptureSearchResult forgeFailedSearchResult(URL url) { - CaptureSearchResult result = new CaptureSearchResult(); - - result.setFile("-"); - result.setOffset(0); - - result.setHttpCode("0"); - - result.setDigest("-"); - result.setMimeType("-"); - result.setCaptureDate(new Date()); - - result.setOriginalUrl(url.toString()); - result.setRedirectUrl("-"); - - String indexUrl; - try { - indexUrl = canonicalizer.urlStringToKey(url.toString()); - } catch (URIException e) { - // not gonna happen... - e.printStackTrace(); - indexUrl = url.toString(); - } - result.setUrlKey(indexUrl); - - return result; - } - - private Resource getLocalCachedResource(URL url, long maxCacheMS, - boolean bUseOlder) throws ResourceNotInArchiveException, - IOException, LiveDocumentNotAvailableException { - - Resource resource = null; - WaybackRequest wbRequest = makeCacheWBRequest(url,maxCacheMS,bUseOlder); - - CaptureSearchResults results = null; - try { - SearchResults gresults = index.query(wbRequest); - if(!(gresults instanceof CaptureSearchResults)) { - throw new IOException("bad result type..."); - } - results = (CaptureSearchResults) gresults; - } catch (ResourceNotInArchiveException e) { -// e.printStackTrace(); - throw e; - } catch (WaybackException e) { - e.printStackTrace(); - throw new IOException(e.getMessage()); - } - CaptureSearchResult result = results.getClosest(wbRequest); - if(result != null) { - if(isForgedFailedSearchResult(result)) { - if(isForgedFailRecentEnough(result)) { - LOGGER.info(url.toString() + " has failed recently"); - throw new LiveDocumentNotAvailableException("failed prev"); - } else { - LOGGER.info(url.toString() + " failed a while ago"); - throw new ResourceNotInArchiveException("Nope"); - } - } - String name = result.getFile(); - long offset = result.getOffset(); - resource = arcCacheDir.getResource(name, offset); - } - return resource; - } - - private Resource getLiveCachedResource(URL url) - throws LiveDocumentNotAvailableException, IOException { - - Resource resource = null; - - LOGGER.info("Caching URL(" + url.toString() + ")"); - ARCLocation location = null; - try { - location = cacher.cache(arcCacheDir, url.toString()); - } catch(LiveDocumentNotAvailableException e) { - // record the failure, so we can fail early next time: - CaptureSearchResult result = forgeFailedSearchResult(url); - index.addSearchResult(result); - LOGGER.info("Added FAIL-URL(" + url.toString() + ") to LiveIndex"); - throw e; - } - if(location != null) { - String name = location.getName(); - long offset = location.getOffset(); - LOGGER.info("Cached URL(" + url.toString() + ") in " + - "ARC(" + name + ") at (" + offset + ")"); - resource = arcCacheDir.getResource(name, offset); - // add the result to the index: - if(resource instanceof ArcResource) { - ArcResource aResource = (ArcResource) resource; - ARCRecord record = (ARCRecord) aResource.getArcRecord(); - - CaptureSearchResult result = adapter.adapt(record); - // HACKHACK: we're getting the wrong offset from the ARCReader: - result.setOffset(offset); - index.addSearchResult(result); - LOGGER.info("Added URL(" + url.toString() + ") in " + - "ARC(" + name + ") at (" + offset + ") to LiveIndex"); - - // we just read thru the doc in order to index it. Reset: - resource = arcCacheDir.getResource(name, offset); - } - - } - - return resource; - } - - /** - * @param url - * @param maxCacheMS - * @param bUseOlder + * Fetch a Resource from the live web, or from a cache of the live web. + * + * @param url to fetch from the live web. + * @param maxCacheMS maximum age of resource to return - optionally honored + * @param bUseOlder if true, return documents older than maxCacheMS if + * a more recent copy is not available. * @return Resource for url * - * @throws LiveDocumentNotAvailableException - * @throws IOException + * @throws LiveDocumentNotAvailableException if the resource cannot be + * retrieved from the live web, but all proxying and caching + * mechanisms functioned properly + * @throws LiveWebCacheUnavailableException if there was a problem either + * accessing the live web, in proxying to the live web, or in + * maintaining the cache for the live web + * @throws IOException for the usual reasons */ public Resource getCachedResource(URL url, long maxCacheMS, - boolean bUseOlder) throws LiveDocumentNotAvailableException, - IOException { - - Resource resource = null; - try { - resource = getLocalCachedResource(url, maxCacheMS, false); - LOGGER.info("Using Cached URL(" + url.toString() + ")"); - - } catch(ResourceNotInArchiveException e) { - try { - LOGGER.info("URL:" + url.toString() + " has not been cached" - + " recently enough. Attempting from Live Web"); - - resource = getLiveCachedResource(url); - - } catch (LiveDocumentNotAvailableException e1) { - if(bUseOlder) { - // we don't have a copy that satisfies the "ideal" maxAge, - // but the file isn't on the live web, and the caller has - // asked to use an older cached copy if a fresh one isn't - // available. - LOGGER.info("Second Cached attempt for URL(" + - url.toString() + ") allowing older..."); - try { - resource = getLocalCachedResource(url, maxCacheMS, true); - } catch (ResourceNotInArchiveException e2) { - LOGGER.info("Unable to live-get and older" + - " is not in cache...throwing LDNAE"); - // rethrow the original... - throw e1; - } - LOGGER.info("Got older version of Cached URL(" + - url.toString() + ")"); - } else { - LOGGER.info("Unable to live-get...throwing LDNAE"); - // rethrow the original... - throw e1; - } - } - } - return resource; - } - + boolean bUseOlder) throws LiveDocumentNotAvailableException, + LiveWebCacheUnavailableException, IOException; /** - * @return the maxFailedCacheMS + * closes all resources */ - public long getMaxFailedCacheMS() { - return maxFailedCacheMS; - } - - /** - * @param maxFailedCacheMS the maxFailedCacheMS to set - */ - public void setMaxFailedCacheMS(long maxFailedCacheMS) { - this.maxFailedCacheMS = maxFailedCacheMS; - } - - /** - * @return the arcCacheDir - */ - public ARCCacheDirectory getArcCacheDir() { - return arcCacheDir; - } - - /** - * @param arcCacheDir the arcCacheDir to set - */ - public void setArcCacheDir(ARCCacheDirectory arcCacheDir) { - this.arcCacheDir = arcCacheDir; - } - - /** - * @return the cacher - */ - public URLCacher getCacher() { - return cacher; - } - - /** - * @param cacher the cacher to set - */ - public void setCacher(URLCacher cacher) { - this.cacher = cacher; - } - - /** - * @return the index - */ - public LiveWebLocalResourceIndex getIndex() { - return index; - } - - /** - * @param index the index to set - */ - public void setIndex(LiveWebLocalResourceIndex index) { - this.index = index; - } - - public UrlCanonicalizer getCanonicalizer() { - return canonicalizer; - } - - public void setCanonicalizer(UrlCanonicalizer canonicalizer) { - this.canonicalizer = canonicalizer; - adapter.setCanonicalizer(canonicalizer); - } + public void shutdown(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:12:29
|
Revision: 3036 http://archive-access.svn.sourceforge.net/archive-access/?rev=3036&view=rev Author: bradtofel Date: 2010-04-14 21:12:22 +0000 (Wed, 14 Apr 2010) Log Message: ----------- FEATURE: exposed much of the previously hard-coded configuration via getters and setters JAVADOC: added more-or-less complete javadoc Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2010-04-14 21:05:13 UTC (rev 3035) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCacheDirectory.java 2010-04-14 21:12:22 UTC (rev 3036) @@ -30,6 +30,7 @@ import java.util.List; import org.apache.log4j.Logger; +import org.archive.io.ArchiveFileConstants; import org.archive.io.WriterPoolSettings; import org.archive.io.arc.ARCConstants; import org.archive.io.arc.ARCWriter; @@ -40,12 +41,11 @@ import org.archive.wayback.util.DirMaker; /** - * Class which manages a growing set of ARC files, managed by an ARCWriterPool. + * Class uses an ARCWriterPool to provide access to a set of ARCWriters, + * exposing getting and setters which simplify Spring configuration. * - * Clients can grab an ARCWriter that they use to append to one of the ARC - * files. - * - * This class also transforms ARCLocations into ARCRecords, using an ARCReader. + * This class also provides one method, getResource() which transforms a String + * path and an long offset into a Resource. * * @author brad * @version $Date$, $Revision$ @@ -54,33 +54,23 @@ private static final Logger LOGGER = Logger.getLogger( ARCCacheDirectory.class.getName()); - private final static int MAX_POOL_WRITERS = 5; - - private final static int MAX_POOL_WAIT = 60 * 1000; - - private static final String OPEN_SUFFIX = ".open"; - /** - * directory where live generated ARCs are stored - */ - public static final String LIVE_WEB_ARC_DIR = "liveweb.arc.dir"; - - /** - * prefeix for live generated ARC files. - */ - public static final String LIVE_WEB_ARC_PREFIX = "liveweb.arc.prefix"; - private ARCWriterPool pool = null; + private int poolWriters = 5; + private int maxPoolWait = 60 * 1000; + private long maxARCSize = ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE; private String arcPrefix = "wayback-live"; + private File arcDir = null; + private ARCWriterPool pool = null; /** - * @throws IOException + * @throws IOException for usual reasons */ public void init() throws IOException { // TODO: check that all props have been set arcDir = DirMaker.ensureDir(arcDir.getAbsolutePath(),"arcPath"); File[] files = { arcDir }; WriterPoolSettings settings = getSettings(true, arcPrefix, files); - pool = new ARCWriterPool(settings, MAX_POOL_WRITERS, MAX_POOL_WAIT); + pool = new ARCWriterPool(settings, poolWriters, maxPoolWait); } /** @@ -94,7 +84,7 @@ * get an ARCWriter. be sure to return it to the pool with returnWriter. * * @return an ARCWriter prepared to store an ARCRecord - * @throws IOException + * @throws IOException for usual reasons */ public ARCWriter getWriter() throws IOException { return (ARCWriter) pool.borrowFile(); @@ -102,7 +92,7 @@ /** * @param w previously borrowed ARCWriter - * @throws IOException + * @throws IOException for usual reasons */ public void returnWriter(ARCWriter w) throws IOException { pool.returnFile(w); @@ -111,10 +101,10 @@ /** * transform an ARCLocation into a Resource. Be sure to call close() on it * when processing is finished. - * @param path - * @param offset + * @param path to ARC file + * @param offset within file where record begins * @return the Resource for the location - * @throws IOException + * @throws IOException for usual reasons */ public Resource getResource(String path, long offset) throws IOException { File arc = new File(path); @@ -122,9 +112,9 @@ String base = arc.getName(); arc = new File(arcDir,base); if(!arc.exists()) { - if(base.endsWith(OPEN_SUFFIX)) { + if(base.endsWith(ArchiveFileConstants.OCCUPIED_SUFFIX)) { String newBase = base.substring(0,base.length() - - OPEN_SUFFIX.length()); + ArchiveFileConstants.OCCUPIED_SUFFIX.length()); arc = new File(arcDir,newBase); } } @@ -142,7 +132,7 @@ final String prefix, final File[] arcDirs) { return new WriterPoolSettings() { public long getMaxSize() { - return ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE; + return maxARCSize; } public List<File> getOutputDirs() { @@ -163,7 +153,6 @@ } public String getSuffix() { - // TODO: is correct? return null; } }; @@ -196,4 +185,46 @@ public void setArcDir(String arcPath) { this.arcDir = new File(arcPath); } + + /** + * @return the poolWriters + */ + public int getPoolWriters() { + return poolWriters; + } + + /** + * @param poolWriters the poolWriters to set + */ + public void setPoolWriters(int poolWriters) { + this.poolWriters = poolWriters; + } + + /** + * @return the maxPoolWait + */ + public int getMaxPoolWait() { + return maxPoolWait; + } + + /** + * @param maxPoolWait the maxPoolWait to set + */ + public void setMaxPoolWait(int maxPoolWait) { + this.maxPoolWait = maxPoolWait; + } + + /** + * @return the maxARCSize + */ + public long getMaxARCSize() { + return maxARCSize; + } + + /** + * @param maxARCSize the maxARCSize to set + */ + public void setMaxARCSize(long maxARCSize) { + this.maxARCSize = maxARCSize; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 21:05:19
|
Revision: 3035 http://archive-access.svn.sourceforge.net/archive-access/?rev=3035&view=rev Author: bradtofel Date: 2010-04-14 21:05:13 +0000 (Wed, 14 Apr 2010) Log Message: ----------- FEATURE: added live web request flag Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-04-14 18:44:53 UTC (rev 3034) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-04-14 21:05:13 UTC (rev 3035) @@ -207,6 +207,11 @@ * scheme as that specified in REQUEST_URL. */ public static final String REQUEST_EXACT_SCHEME_ONLY = "requestexactscheme"; + + /** + * Indicates user requested content from proxied from the live web. + */ + public static final String REQUEST_IS_LIVE_WEB = "requestliveweb"; /** * indicates positive value for any request boolean flag. @@ -720,6 +725,13 @@ public boolean isExactScheme() { return getBoolean(REQUEST_EXACT_SCHEME_ONLY); } + + public void setLiveWebRequest(boolean isLiveWebRequest) { + setBoolean(REQUEST_IS_LIVE_WEB,isLiveWebRequest); + } + public boolean isLiveWebRequest() { + return getBoolean(REQUEST_IS_LIVE_WEB); + } public String getAnchorTimestamp() { return get(REQUEST_ANCHOR_DATE); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 18:44:59
|
Revision: 3034 http://archive-access.svn.sourceforge.net/archive-access/?rev=3034&view=rev Author: bradtofel Date: 2010-04-14 18:44:53 +0000 (Wed, 14 Apr 2010) Log Message: ----------- BUGFIX(unreported): chunked peek-ahead was looking for 10-13 line-ending, not 13-10. Now it allows either, as well as a lone 10. COMMENT/JAVADOC update Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-14 02:33:08 UTC (rev 3033) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-14 18:44:53 UTC (rev 3034) @@ -34,6 +34,13 @@ * Abstraction on top of a document stored in a WaybackCollection. Currently * implemented subclasses include ArcResource and WarcResource. * + * This implementation needs some pretty drastic refactoring.. May have to wait + * for 2.0. This should be a byte-oriented record, and allow wrapping the + * interior byte-stream in on the more full featured HTTP libraries + * (jetty/apache-http-client/w3c-http-reference). + * + * For now, it is a system-wide assumption that all resources are HTTP based. + * * @author Brad Tofel * @version $Date$, $Revision$ */ @@ -42,8 +49,20 @@ private InputStream is; public abstract void close() throws IOException; + /** + * Assumes an HTTP resource - return the HTTP response code + * @return the HTTP response code from the HTTP message + */ public abstract int getStatusCode(); + /** + * @return the size in bytes of the record payload, including HTTP header + */ public abstract long getRecordLength(); + /** + * Assumes an HTTP response - return the HTTP headers, not including the + * HTTP Message header + * @return key-value Map of HTTP headers + */ public abstract Map<String,String> getHttpHeaders(); private void validate() throws IOException { @@ -59,10 +78,13 @@ this.is = new BufferedInputStream(is); } } + /** * indicate that there is a Transfer-Encoding: chunked header, so the input - * data should be dechunked as it is read. - * @throws IOException + * data should be dechunked as it is read. This method actually peeks + * ahead to verify that there is a hex-encoded chunk length before + * assuming the data is chunked. + * @throws IOException for usual reasons */ public void setChunkedEncoding() throws IOException { validate(); @@ -70,21 +92,31 @@ int max = 50; is.mark(max+2); int cur = 0; + int hexFound = 0; boolean isChunked = false; while(cur < max) { int nextC = is.read(); - if(nextC == 10) { + // allow CRLF and plain ole LF: + if((nextC == 13) || (nextC == 10)) { // must have read at least 1 hex char: - if(cur > 0) { + if(hexFound > 0) { + if(nextC == 10) { + isChunked = true; + break; + } nextC = is.read(); - if(nextC == 13) { + if(nextC == 10) { isChunked = true; break; } } + // keep looking to allow some blank lines. } else { // better be a hex character: - if(!isHex(nextC)) { + if(isHex(nextC)) { + hexFound++; + } else { + // not a hex digit: not a chunked stream. break; } } @@ -108,80 +140,45 @@ } return false; } - - /** - * @return - * @throws IOException - * @see java.io.BufferedInputStream#available() - */ + public int available() throws IOException { validate(); return is.available(); } - /** - * @param readlimit - * @see java.io.BufferedInputStream#mark(int) - */ + public void mark(int readlimit) { if(is != null) { is.mark(readlimit); } } - /** - * @return - * @see java.io.BufferedInputStream#markSupported() - */ + public boolean markSupported() { if(is == null) { return false; } return is.markSupported(); } - /** - * @return - * @throws IOException - * @see java.io.BufferedInputStream#read() - */ + public int read() throws IOException { validate(); return is.read(); } - /** - * @param b - * @param off - * @param len - * @return - * @throws IOException - * @see java.io.BufferedInputStream#read(byte[], int, int) - */ + public int read(byte[] b, int off, int len) throws IOException { validate(); return is.read(b, off, len); } - /** - * @param b - * @return - * @throws IOException - * @see java.io.FilterInputStream#read(byte[]) - */ + public int read(byte[] b) throws IOException { validate(); return is.read(b); } - /** - * @throws IOException - * @see java.io.BufferedInputStream#reset() - */ + public void reset() throws IOException { validate(); is.reset(); } - /** - * @param n - * @return - * @throws IOException - * @see java.io.BufferedInputStream#skip(long) - */ + public long skip(long n) throws IOException { validate(); return is.skip(n); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 03:10:30
|
Revision: 3033 http://archive-access.svn.sourceforge.net/archive-access/?rev=3033&view=rev Author: bradtofel Date: 2010-04-14 02:33:08 +0000 (Wed, 14 Apr 2010) Log Message: ----------- JAVADOC: for all public methods Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2010-04-10 02:41:32 UTC (rev 3032) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2010-04-14 02:33:08 UTC (rev 3033) @@ -88,28 +88,75 @@ // Present for... requests that resulted in an expected Exception. private WaybackException exception = null; + /** + * Constructor for a "generic" UIResults, where little/no context is + * available. Likely used for static UI requests, and more specifically, for + * template .jsp files, including header/footer .jsps. These may be called + * in multiple contexts, but don't expect much data to be avaialble beyond + * the AccessPoint that handled the request. + * @param wbRequest WaybackRequest with some or no information + * @param uriConverter the ResultURIConveter to use with the AccessPoint + * handling the request. + */ public UIResults(WaybackRequest wbRequest,ResultURIConverter uriConverter) { this.wbRequest = wbRequest; this.uriConverter = uriConverter; } + + /** + * Constructor for a "exception" UIResults, where little/no context is + * available. Likely used for exception rendering .jsp files. + * @param wbRequest WaybackRequest with some or no information, but at + * least the AccessPoint that handled the request. + * @param uriConverter the ResultURIConveter to use with the AccessPoint + * handling the request. + * @param exception WaybackException to be rendered. + */ public UIResults(WaybackRequest wbRequest, ResultURIConverter uriConverter, WaybackException exception) { this.wbRequest = wbRequest; this.uriConverter = uriConverter; this.exception = exception; } + /** + * Constructor for "Url Query" UIResults, where the request successfully + * matched something from the index. Used to hand off search results and + * context to the query rendering .jsp files. + * @param wbRequest WaybackRequest with a valid request + * @param uriConverter the ResultURIConveter to use with the AccessPoint + * handling the request. + * @param captureResults CaptureSearchResults object with matching data. + */ public UIResults(WaybackRequest wbRequest, ResultURIConverter uriConverter, CaptureSearchResults captureResults) { this.wbRequest = wbRequest; this.uriConverter = uriConverter; this.captureResults = captureResults; } + /** + * Constructor for "Url Prefix Query" UIResults, where the request + * successfully matched something from the index. Used to hand off search + * results and context to the query rendering .jsp files. + * @param wbRequest WaybackRequest with a valid request + * @param uriConverter the ResultURIConveter to use with the AccessPoint + * @param urlResults UrlSearchResults object with matching data. + */ public UIResults(WaybackRequest wbRequest, ResultURIConverter uriConverter, UrlSearchResults urlResults) { this.wbRequest = wbRequest; this.uriConverter = uriConverter; this.urlResults = urlResults; } + /** + * Constructor for "Replay" UIResults, where the request + * successfully matched something from the index, the document was retrieved + * from the ResourceStore, and is going to be shown to the user. + * @param wbRequest WaybackRequest with some or no information + * @param uriConverter the ResultURIConveter to use with the AccessPoint + * @param captureResults CaptureSearchResults object with matching data. + * @param result the specific CaptureSearchResult being replayed + * @param resource the actual Resource being replayed + */ public UIResults(WaybackRequest wbRequest, ResultURIConverter uriConverter, CaptureSearchResults captureResults, CaptureSearchResult result, Resource resource) { @@ -123,12 +170,6 @@ /* * GENERAL GETTERS: */ - /** - * @return the uriConverter - */ - public ResultURIConverter getUriConverter() { - return uriConverter; - } /** * @return Returns the wbRequest. @@ -182,6 +223,10 @@ return contentJsp; } + /** + * @return the original URL as recieved by Wayback, before forwarding to + * a .jsp + */ public String getOriginalRequestURL() { return originalRequestURL; } @@ -191,7 +236,9 @@ */ /** - * @param url + * Create a self-referencing URL that will perform a query for all copies + * of the given URL. + * @param url to search for copies of * @return String url that will make a query for all captures of an URL. */ public String makeCaptureQueryUrl(String url) { @@ -202,8 +249,11 @@ return newWBR.getContextPrefix() + "query?" + newWBR.getQueryArguments(1); } + /** - * @param configName + * Get a String generic AccessPoint config (ala AccessPoint.configs Spring + * config) + * @param configName key for configuration property * @return String configuration for the context, if present, otherwise null */ public String getContextConfig(final String configName) { @@ -217,8 +267,10 @@ } return configValue; } + /** - * @param result + * Create a replay URL for the given CaptureSearchResult + * @param result CaptureSearchResult to replay * @return URL string that will replay the specified Resource Result. */ public String resultToReplayUrl(CaptureSearchResult result) { @@ -231,7 +283,9 @@ } /** - * @param pageNum + * Create a self-referencing URL that will drive to the given page, + * simplifying rendering pagination + * @param pageNum page number of results to link to. * @return String URL which will drive browser to search results for a * different page of results for the same query */ @@ -245,24 +299,28 @@ * FORWARD TO A .JSP */ - /** - * Store this UIResults in the HttpServletRequest argument. - * @param httpRequest - * @param contentJsp - */ - public void storeInRequest(HttpServletRequest httpRequest, - String contentJsp) { - this.contentJsp = contentJsp; - this.originalRequestURL = httpRequest.getRequestURL().toString(); - httpRequest.setAttribute(FERRET_NAME, this); - } +// /** +// * Store this UIResults in the HttpServletRequest argument. +// * @param httpRequest the HttpServletRequest to store this UIResults in. +// * @param contentJsp th +// */ +// public void storeInRequest(HttpServletRequest httpRequest, +// String contentJsp) { +// this.contentJsp = contentJsp; +// this.originalRequestURL = httpRequest.getRequestURL().toString(); +// httpRequest.setAttribute(FERRET_NAME, this); +// } /** - * @param request - * @param response - * @param targt - * @throws ServletException - * @throws IOException + * Store this UIResults object in the given HttpServletRequest, then + * forward the request to target, which should be a .jsp capable of drawing + * the information stored in this object. + * @param request the HttpServletRequest + * @param response the HttpServletResponse + * @param target the String path to the .jsp to handle drawing the data, + * relative to the contextRoot (ex. "/WEB-INF/query/foo.jsp") + * @throws ServletException for usual reasons... + * @throws IOException for usual reasons... */ public void forward(HttpServletRequest request, HttpServletResponse response, final String target) @@ -282,7 +340,10 @@ * EXTRACT FROM HttpServletRequest */ /** - * @param httpRequest + * Extract a generic UIResults from the HttpServletRequest. Probably used + * by a header/footer template .jsp file. + * @param httpRequest the HttpServletRequest where the UIResults was + * ferreted away * @return generic UIResult with info from httpRequest applied. */ public static UIResults getGeneric(HttpServletRequest httpRequest) { @@ -296,9 +357,13 @@ } /** - * @param httpRequest - * @return UIResults from httpRequest - * @throws ServletException + * Extract an Exception UIResults from the HttpServletRequest. Probably used + * by a .jsp responsible for actual drawing errors for the user. + * @param httpRequest the HttpServletRequest where the UIResults was + * ferreted away + * @return Exception UIResult with info from httpRequest applied. + * @throws ServletException if expected information is not available. Likely + * means a programming bug, or a configuration problem. */ public static UIResults extractException(HttpServletRequest httpRequest) throws ServletException { @@ -319,9 +384,14 @@ return results; } /** - * @param httpRequest - * @return UIResults from httpRequest - * @throws ServletException + * Extract a CaptureQuery UIResults from the HttpServletRequest. Probably + * used by a .jsp responsible for actually drawing search results for the + * user. + * @param httpRequest the HttpServletRequest where the UIResults was + * ferreted away + * @return CaptureQuery UIResult with info from httpRequest applied. + * @throws ServletException if expected information is not available. Likely + * means a programming bug, or a configuration problem. */ public static UIResults extractCaptureQuery(HttpServletRequest httpRequest) throws ServletException { @@ -342,9 +412,14 @@ return results; } /** - * @param httpRequest - * @return UIResults from httpRequest - * @throws ServletException + * Extract a UrlQuery UIResults from the HttpServletRequest. Probably + * used by a .jsp responsible for actually drawing search results for the + * user. + * @param httpRequest the HttpServletRequest where the UIResults was + * ferreted away + * @return UrlQuery UIResult with info from httpRequest applied. + * @throws ServletException if expected information is not available. Likely + * means a programming bug, or a configuration problem. */ public static UIResults extractUrlQuery(HttpServletRequest httpRequest) throws ServletException { @@ -365,9 +440,14 @@ return results; } /** - * @param httpRequest - * @return UIResults from httpRequest - * @throws ServletException + * Extract a Replay UIResults from the HttpServletRequest. Probably + * used by a .jsp insert, responsible for rendering content into replayed + * Resources to enhance the Replay experience. + * @param httpRequest the HttpServletRequest where the UIResults was + * ferreted away + * @return Replay UIResult with info from httpRequest applied. + * @throws ServletException if expected information is not available. Likely + * means a programming bug, or a configuration problem. */ public static UIResults extractReplay(HttpServletRequest httpRequest) throws ServletException { @@ -400,6 +480,13 @@ * STATIC CONVENIENCE METHODS */ + /** + * @return the uriConverter + * @deprecated use getURIConverter() + */ + public ResultURIConverter getUriConverter() { + return uriConverter; + } private static void replaceAll(StringBuffer s, final String o, final String n) { @@ -414,8 +501,9 @@ /** * return a string appropriate for inclusion as an XML tag - * @param tagName + * @param tagName raw string to be encoded * @return encoded tagName + * @deprecated use getFormatter().escapeHtml(String) */ public static String encodeXMLEntity(final String tagName) { StringBuffer encoded = new StringBuffer(tagName); @@ -430,8 +518,9 @@ /** * return a string appropriate for inclusion as an XML tag - * @param content + * @param content to escape * @return encoded content + * @deprecated use getFormatter().escapeHtml(String) */ public static String encodeXMLContent(final String content) { StringBuffer encoded = new StringBuffer(content); @@ -447,8 +536,9 @@ /** * return a string appropriate for inclusion as an XML tag - * @param content + * @param content to encode * @return encoded content + * @deprecated use getFormatter().escapeHtml(String) */ public static String encodeXMLEntityQuote(final String content) { StringBuffer encoded = new StringBuffer(content); @@ -465,7 +555,7 @@ /** * @return URL that points to the root of the current WaybackContext - * @deprecated + * @deprecated use getWbRequest().getContextPrefix() */ public String getContextPrefix() { return getWbRequest().getContextPrefix(); @@ -473,7 +563,7 @@ /** * @return StringFormatter localized to user request - * @deprecated + * @deprecated use getWbRequest().getFormatter() */ public StringFormatter getFormatter() { return getWbRequest().getFormatter(); @@ -481,7 +571,7 @@ /** * @return URL that points to the root of the Server - * @deprecated + * @deprecated use getWbRequest().getServerPrefix() */ public String getServerPrefix() { return getWbRequest().getServerPrefix(); @@ -489,17 +579,18 @@ /** * @param contentJsp the contentJsp to set - * @deprecated + * @deprecated use forward() */ public void setContentJsp(String contentJsp) { this.contentJsp = contentJsp; } /** - * @param url - * @param timestamp + * @param url to replay + * @param timestamp to replay * @return String url that will replay the url at timestamp - * @deprecated + * @deprecated use resultToReplayUrl(CaptureSearchResult) or + * getURIConverter.makeReplayURI() */ public String makeReplayUrl(String url, String timestamp) { if(uriConverter == null) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-10 02:41:38
|
Revision: 3032 http://archive-access.svn.sourceforge.net/archive-access/?rev=3032&view=rev Author: bradtofel Date: 2010-04-10 02:41:32 +0000 (Sat, 10 Apr 2010) Log Message: ----------- INITIAL REV: Code which simplifies partitioning Date-related types into partitions of human-logical sizes, Day, Hour, Two Weeks, Year, etc. Users must provide a class which maps their instance-specific type to a Date, and which adds their type to a partition. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/ElementPartitionMap.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partition.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/PartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/DayPartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/HourPartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/MonthPartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoMonthPartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoYearPartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/WeekPartitionSize.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/YearPartitionSize.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/ElementPartitionMap.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/ElementPartitionMap.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/ElementPartitionMap.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,59 @@ +/* ElementPartitionMap + * + * $Id$: + * + * Created on Apr 8, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.util.partition; + +import java.util.Date; + +/** + * @author brad + * @param <T> Specific Class which can be mapped to a Date, and added to a + * Partition + * + */ +public interface ElementPartitionMap<T> { + /** + * Convert an element to a Date ex: + * + * return element.getDate(); + * + * @param element the element to convert + * @return the Date for the element + */ + public Date elementToDate(T element); + + /** + * Add the element to a partition, possible modifying the Partition in some + * way. ex: + * + * partition.add(element); + * partition.addTotal(1); + * + * @param element to be added + * @param partition to which the element should be added + */ + public void addElementToPartition(T element, Partition<T> partition); + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/ElementPartitionMap.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partition.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partition.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partition.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,126 @@ +package org.archive.wayback.util.partition; + +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +/** + * A class which holds elements of some type for a particular Date range. + * + * This class also has two additional application-usable fields: + + * containsClosest: boolean - tracks whether this Partition holds the + * "closest" element of interest to an application + * + * total: int - independent counter for total internal application-level + * elements, useful when nesting partitions, to track the sum-of-totals + * of interior partitions + * + * @author brad + * + * @param <T> Generic type which this partition holds. + */ +public class Partition<T> { + + private Date start = null; + private Date end = null; + private List<T> list = null; + private boolean containsClosest = false; + private int total = 0; + + /** + * Create a Partition for holding elements between the two argument Date + * objects. + * @param start Date representing the start of elements held in this + * Partition, inclusive. + * @param end Date representing the end of elements held in this Partition, + * exclusive. + */ + public Partition(Date start, Date end) { + this.start = start; + this.end = new Date(end.getTime()-1); + list = new ArrayList<T>(); + total = 0; + } + + /** + * Checks if a date is within this partition + * @param d Date to check + * @return boolean true if d is >= start, and < end + */ + public boolean containsDate(Date d) { + return (start.compareTo(d) <= 0) && + (end.compareTo(d) > 0); + } + + /** + * @return the start Date for this Partition. + */ + public Date getStart() { + return start; + } + + /** + * @return the end Date for this Partition. + */ + public Date getEnd() { + return end; + } + + /** + * @return number of elements held in this Partition. + */ + public int count() { + return list.size(); + } + + /** + * @param o element to be added to this partition. + */ + public void add(T o) { + list.add(o); + } + + /** + * @return an Iterator of elements held in this Partition. + */ + public Iterator<T> iterator() { + return list.iterator(); + } + + /** + * @return a List of the elements held in this Partition. + */ + public List<T> list() { + return list; + } + + /** + * @return the containsClosest + */ + public boolean isContainsClosest() { + return containsClosest; + } + + /** + * @param containsClosest the containsClosest to set + */ + public void setContainsClosest(boolean containsClosest) { + this.containsClosest = containsClosest; + } + + /** + * Add and int to the Total count for this partition. + * @param numberToAdd number to add + */ + public void addTotal(int numberToAdd) { + total += numberToAdd; + } + /** + * @return the Total count for this partition. + */ + public int getTotal() { + return total; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partition.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/PartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/PartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/PartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,137 @@ +package org.archive.wayback.util.partition; + +import java.util.Calendar; + +/** + * A class which simplifies partitioning Dates based on human logical time + * intervals: Day, Week, TwoYear. + * + * Implementing classes provide methods to align Calendars at the start of the + * nearest Day, Month, Week, etc. + * + * In addition, implementors provide methods to create new Calendars based on + * their specific alignment size. + * + * @author brad + * + */ +public interface PartitionSize { + + /** + * number of milliseconds in a second.. + */ + public final static long MS_IN_SEC = 1000; + /** + * seconds in a non-leap-second hour + */ + public final static long SEC_IN_HOUR = 3600; + /** + * hours in a day: 24 + */ + public final static long HOUR_IN_DAY = 24; + /** + * days in a 7 day week... what color was his white horse? + */ + public final static long DAY_IN_WEEK = 7; + /** + * approximate days in one month, that is, 30 days + */ + public final static long DAY_IN_MONTH = 30; + /** + * days in one year, assuming a non-leap year + */ + public final static long DAY_IN_YEAR = 365; + + /** + * milliseconds in 1 hour (approximate: no leap second accounted for) + */ + public final static long MS_IN_HOUR = MS_IN_SEC * SEC_IN_HOUR; + /** + * milliseconds in 1 day (approximate: no leap second accounted for) + */ + public final static long MS_IN_DAY = MS_IN_HOUR * HOUR_IN_DAY; + /** + * milliseconds in 7 days (approximate: no leap second accounted for) + */ + public final static long MS_IN_WEEK = MS_IN_DAY * DAY_IN_WEEK; + /** + * milliseconds in one month (approximate: no leap day/sec accounted for, + * and assumes 30 days in a month) + */ + public final static long MS_IN_MONTH = MS_IN_DAY * DAY_IN_MONTH; + /** + * milliseconds in two months (approximate: no leap day/sec accounted for, + * and assumes 30 day months) + */ + public final static long MS_IN_TWO_MONTH = MS_IN_MONTH * 2; + /** + * milliseconds in one year (approximate: no leap day/sec accounted for) + */ + public final static long MS_IN_YEAR = MS_IN_DAY * DAY_IN_YEAR; + /** + * milliseconds in two years (approximate: no leap day/sec accounted for) + */ + public final static long MS_IN_TWO_YEAR = MS_IN_YEAR * 2; + + /** + * + */ + public final static String HOUR_NAME = "hour"; + /** + * + */ + public final static String DAY_NAME = "day"; + /** + * + */ + public final static String WEEK_NAME = "week"; + /** + * + */ + public final static String MONTH_NAME = "month"; + /** + * + */ + public final static String TWO_MONTH_NAME = "twomonth"; + /** + * + */ + public final static String YEAR_NAME = "year"; + /** + * + */ + public final static String TWO_YEAR_NAME = "twoyear"; + + /** + * Align the calendar argument to the start of the interval covered by + * this size. Calling this method on a DayPartitionSize will align the + * Calendar to the beginning of the Day in which the Calendar's Date object + * falls within. + * @param in Calendar object which has internal Date set + */ + public void alignStart(Calendar in); + + /** + * Create a new Calendar object, aligned relative to the Calendar argument, + * either forward or backward some number of partitions. + * @param start the returned Calendar will be aligned one day, week, month, + * etc. ahead or behind of this Calendar argument. + * @param offset the relative distance to move the returned calendar + * relative to the argument Calendar. + * @return a new Calendar aligned relative to the start Calendar. + */ + public Calendar increment(Calendar start, int offset); + + /** + * @return the estimated number of milliseconds covered by this + * PartitionSize. Note that this is estimated because of different number of + * days in a month, leap days, leap seconds, etc. + */ + public long intervalMS(); + + /** + * @return the name of this PartitionSize. Likely useful for localized + * lookup of human readable text from a properties file. + */ + public String name(); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/PartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,313 @@ +package org.archive.wayback.util.partition; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.TimeZone; + +import org.apache.log4j.Logger; +import org.archive.wayback.util.partition.size.DayPartitionSize; +import org.archive.wayback.util.partition.size.HourPartitionSize; +import org.archive.wayback.util.partition.size.MonthPartitionSize; +import org.archive.wayback.util.partition.size.TwoMonthPartitionSize; +import org.archive.wayback.util.partition.size.TwoYearPartitionSize; +import org.archive.wayback.util.partition.size.WeekPartitionSize; +import org.archive.wayback.util.partition.size.YearPartitionSize; + +/** + * Class which divides a set of date-related objects into sub-sets by time + * ranges. + * + * This class provides methods for: + * + * 1) determining the smallest PartitionSize that can be used to cover a time + * range, using at most a set number of partitions + * 2) creating a List of Partition objects covering a span of time, each having + * a specified size + * 3) efficiently populating an iterator of date-related objects into List of + * Partition objects + * + * @author brad + * + * @param <T> generic class type to use with this Partitioner + */ +public class Partitioner<T> { + + private static final TimeZone TZ_UTC = TimeZone.getTimeZone("UTC"); + private static final Logger LOGGER = Logger.getLogger( + Partitioner.class.getName()); + + private ElementPartitionMap<T> map = null; + + /** + * PartitionSize based on Hour intervals + */ + public static PartitionSize hourSize = new HourPartitionSize(); + /** + * PartitionSize based on Day intervals + */ + public static PartitionSize daySize = new DayPartitionSize(); + /** + * PartitionSize based on Week intervals + */ + public static PartitionSize weekSize = new WeekPartitionSize(); + /** + * PartitionSize based on Month intervals + */ + public static PartitionSize monthSize = new MonthPartitionSize(); + /** + * PartitionSize based on Two Month intervals + */ + public static PartitionSize twoMonthSize = new TwoMonthPartitionSize(); + /** + * PartitionSize based on Year intervals + */ + public static PartitionSize yearSize = new YearPartitionSize(); + /** + * PartitionSize based on Two Year intervals + */ + public static PartitionSize twoYearSize = new TwoYearPartitionSize(); + + private static PartitionSize[] sizes = { + hourSize, + daySize, + weekSize, + monthSize, + twoMonthSize, + yearSize, + twoYearSize + }; + + /** + * @param map that converts from the Generic type used in this instance + * to a Date, and adds a Generic type used to a Partition + */ + public Partitioner(ElementPartitionMap<T> map) { + this.map = map; + } + /** + * Get a PartitionSize object by it's name + * @param name of the PartitionSize + * @return PartitionSize matching the name, or a TwoYearPartionSize if name + * is unknown + */ + public static PartitionSize getSize(String name) { + for(PartitionSize pa : sizes) { + if(pa.name().equals(name)) { + return pa; + } + } + return twoYearSize; + } + + /** + * Attempt to find the smallest PartitionSize implementation which, spanning + * the range first and last specified, produces at most maxP partitions. + * @param first Date of beginning of time range + * @param last Date of end of time range + * @param maxP maximum number of Partitions to use + * @return a PartitionSize object which will divide the range into at most + * maxP Partitions + */ + public PartitionSize getSize(Date first, Date last, int maxP) { + long diffMS = last.getTime() - first.getTime(); + for(PartitionSize pa : sizes) { + long maxMS = maxP * pa.intervalMS(); + if(maxMS > diffMS) { + return pa; + } + } + return twoYearSize; + } + + private void logDates(String message, Date date1, Date date2) { + SimpleDateFormat f = new SimpleDateFormat("H:mm:ss:SSS MMM d, yyyy"); + f.setTimeZone(TZ_UTC); + String pd1 = f.format(date1); + String pd2 = f.format(date2); + LOGGER.info(message + ":" + pd1 + " - " + pd2); + } + + /** + * Create a List of Partition objects of the specified size, which span the + * date range specified. + * + * @param size of Partitions to create + * @param start Date of beginning of time range to cover + * @param end Date of end of time range to cover + * @return List of Partitions spanning start and end, sized size, in date- + * ascending order. + */ + public List<Partition<T>> getRange(PartitionSize size, Date start, + Date end) { +// logDates("Constructing partitions Size(" + size.name() + ")",start,end); +// Date origStart = new Date(start.getTime()); + List<Partition<T>> partitions = new ArrayList<Partition<T>>(); + Calendar cStart = Calendar.getInstance(TZ_UTC); + cStart.setTime(start); + size.alignStart(cStart); +// logDates("AlignedStart("+size.name()+")",origStart,cStart.getTime()); + Calendar cEnd = size.increment(cStart, 1); +// logDates("AlignedEnd("+size.name()+")",cStart.getTime(),cEnd.getTime()); + while(cStart.getTime().compareTo(end) < 0) { + partitions.add(new Partition<T>(cStart.getTime(), cEnd.getTime())); + cStart = cEnd; + cEnd = size.increment(cStart, 1); +// logDates("Incremented("+size.name()+")", +// cStart.getTime(),cEnd.getTime()); + } + return partitions; + } + + /** + * Add elements from itr into the appropriate partitions. Assumes that + * all elements fit in one of the argument Partitions, that the partitions + * are in ascending order by time, and that elements returned from the + * Iterator are in ascending time order. + * + * @param partitions to populate with objects + * @param itr ascending Iterator of objects to place into the partitions + */ + public void populate(List<Partition<T>> partitions, + Iterator<T> itr) { + int idx = 0; + int size = partitions.size(); + T element = null; + while(idx < size) { + Partition<T> partition = partitions.get(idx); + if(element == null) { + if(itr.hasNext()) { + element = itr.next(); + } else { + // all done + break; + } + } + // will current result fit in the current partition? + while(partition.containsDate(map.elementToDate(element))) { + map.addElementToPartition(element, partition); + element = null; + if(itr.hasNext()) { + element = itr.next(); + } else { + break; + } + } + idx++; + } + if(itr.hasNext()) { + // eew... Likely bad usage. is this an error? + LOGGER.warn("Not all elements fit in partitions!"); + } + } + + /** + * Debugging method + * @param partitions to dump + */ + public void dumpPartitions(List<Partition<T>> partitions) { + int i = 0; + for(Partition<T> partition : partitions) { + i++; + logDates("Partition("+i+")", + partition.getStart(), partition.getEnd()); + } + } + + /* + * + * SOME UNFINISHED/UNTESTED CODE WHICH MAY BE OF INTEREST IN THE FUTURE + * FOLLOWS. NONE IS USED FOR NOW: + * + */ + +// /** +// * Create a List of Partitions centered at center, extending back in time +// * to start, and forward to end. If more than count partitions are required, +// * then the edge partitions will be grown until the range is extended to +// * start and end, with the edge partitions being non-standard size. +// * +// * @param center +// * @param start +// * @param end +// * @param count +// * @return +// */ +// public List<Partition<T>> getCentered(PartitionSize size, Date center, +// Date start, Date end, int count) { +// +// List<Partition<T>> partitions = new ArrayList<Partition<T>>(); +// Calendar cStart = Calendar.getInstance(TimeZone.getTimeZone("GMT")); +// cStart.setTime(center); +// size.alignStart(cStart); +// Calendar cEnd = size.increment(cStart, 1); +// +// partitions.add(new Partition<T>(cStart.getTime(),cEnd.getTime())); +// +// int numSides = (count - 1) / 2; +// // first add those backwards: +// Partition<T> cur = null; +// for(int i=1; i <= numSides; i++) { +// cEnd = cStart; +// cStart = size.increment(cStart, -1); +// Date curStart = cStart.getTime(); +// if(i == numSides) { +// // first partition, maybe make longer: +// if(curStart.after(start)) { +// curStart = new Date(start.getTime() - 1000); +// } +// } +// cur = new Partition<T>(curStart, cEnd.getTime()); +// partitions.add(0,cur); +// } +// +// // re-align center, and increment: +// cStart.setTime(center); +// size.alignStart(cStart); +// cStart = size.increment(cStart, 1); +// cEnd = size.increment(cStart, 1); +// +// for(int i=1; i <= numSides; i++) { +// Date curEnd = cEnd.getTime(); +// if(i == numSides) { +// // last partition, maybe make longer: +// if(curEnd.before(end)) { +// // end is exclusive, so make 1 MS more: +// curEnd = end; +// } +// } +// cur = new Partition<T>(cStart.getTime(),curEnd); +// partitions.add(cur); +// cStart = cEnd; +// cEnd = size.increment(cStart, 1); +// } +// return partitions; +// } + +// public List<Partition<T>> partitionRange(Date start, Date end, String name) { + // +// PartitionSize size = getSize(name); +// return getRange(size, start, end); +// } +// public List<Partition<T>> partitionCentered(Date center, Date start, +// Date end, int count, String name) { + // +// PartitionSize size = getSize(name); +// return getCentered(size, center, start, end, count); +// } +// public List<Partition<T>> partitionRange(Date start, Date end, int max) { + // +// PartitionSize size = getSize(start, end, max); +// return getRange(size, start, end); +// } +// public List<Partition<T>> partitionCentered(Date center, Date start, +// Date end, int count) { + // +// PartitionSize size = getSize(start,end,count); +// return getCentered(size, center, start, end, count); +// } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/Partitioner.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/DayPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/DayPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/DayPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,37 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on one Day partitions + * @author brad + * + */ +public class DayPartitionSize implements PartitionSize { + + public String name() { + return PartitionSize.DAY_NAME; + } + + public long intervalMS() { + return MS_IN_DAY; + } + + public void alignStart(Calendar in) { + in.set(Calendar.HOUR_OF_DAY,0); + in.set(Calendar.MINUTE,0); + in.set(Calendar.SECOND,0); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.DAY_OF_YEAR,1 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/DayPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/HourPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/HourPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/HourPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,36 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on one Hour partitions + * @author brad + * + */ +public class HourPartitionSize implements PartitionSize { + + public String name() { + return PartitionSize.HOUR_NAME; + } + + public long intervalMS() { + return MS_IN_HOUR; + } + + public void alignStart(Calendar in) { + in.set(Calendar.MINUTE,0); + in.set(Calendar.SECOND,0); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.HOUR_OF_DAY,1 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/HourPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/MonthPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/MonthPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/MonthPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,37 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on one Month partitions + * @author brad + * + */ +public class MonthPartitionSize implements PartitionSize { + + public String name() { + return PartitionSize.MONTH_NAME; + } + + public long intervalMS() { + return MS_IN_MONTH; + } + + public void alignStart(Calendar in) { + in.set(Calendar.DAY_OF_MONTH,1); + in.set(Calendar.HOUR_OF_DAY,0); + in.set(Calendar.MINUTE,0); + in.set(Calendar.SECOND,0); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.MONTH,1 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/MonthPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoMonthPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoMonthPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoMonthPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,38 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on two Month partitions + * @author brad + * + */ +public class TwoMonthPartitionSize implements PartitionSize { + + public String name() { + return PartitionSize.TWO_MONTH_NAME; + } + + public long intervalMS() { + return MS_IN_TWO_MONTH; + } + + public void alignStart(Calendar in) { + in.set(Calendar.DAY_OF_MONTH,1); + in.set(Calendar.HOUR_OF_DAY,0); + in.set(Calendar.MINUTE,0); + in.set(Calendar.SECOND,0); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.MONTH,2 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoMonthPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoYearPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoYearPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoYearPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,38 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on two Year partitions + * @author brad + * + */ +public class TwoYearPartitionSize implements PartitionSize { + + public String name() { + return PartitionSize.TWO_YEAR_NAME; + } + + public long intervalMS() { + return MS_IN_TWO_YEAR; + } + + public void alignStart(Calendar in) { + in.set(Calendar.DAY_OF_YEAR,1); + in.set(Calendar.HOUR_OF_DAY,0); + in.set(Calendar.MINUTE,0); + in.set(Calendar.SECOND,0); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.YEAR,2 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/TwoYearPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/WeekPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/WeekPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/WeekPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,36 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on one Week partitions + * @author brad + * + */ +public class WeekPartitionSize implements PartitionSize { + + public String name() { + return PartitionSize.WEEK_NAME; + } + + public long intervalMS() { + return MS_IN_WEEK; + } + + public void alignStart(Calendar in) { + in.set(Calendar.HOUR_OF_DAY,1); + in.set(Calendar.MINUTE,1); + in.set(Calendar.SECOND,1); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.DAY_OF_YEAR,7 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/WeekPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/YearPartitionSize.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/YearPartitionSize.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/YearPartitionSize.java 2010-04-10 02:41:32 UTC (rev 3032) @@ -0,0 +1,37 @@ +package org.archive.wayback.util.partition.size; + +import java.util.Calendar; +import java.util.TimeZone; + +import org.archive.wayback.util.partition.PartitionSize; + +/** + * PartitionSize which aligns on one Year partitions + * @author brad + * + */ +public class YearPartitionSize implements PartitionSize { + + public String name() { + return YEAR_NAME; + } + + public long intervalMS() { + return MS_IN_YEAR; + } + + public void alignStart(Calendar in) { + in.set(Calendar.DAY_OF_YEAR,1); + in.set(Calendar.HOUR_OF_DAY,0); + in.set(Calendar.MINUTE,0); + in.set(Calendar.SECOND,0); + in.set(Calendar.MILLISECOND, 0); + } + + public Calendar increment(Calendar start, int offset) { + Calendar end = Calendar.getInstance(TimeZone.getTimeZone("GMT")); + end.setTime(start.getTime()); + end.add(Calendar.YEAR,1 * offset); + return end; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/partition/size/YearPartitionSize.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3031 http://archive-access.svn.sourceforge.net/archive-access/?rev=3031&view=rev Author: bradtofel Date: 2010-04-09 02:02:57 +0000 (Fri, 09 Apr 2010) Log Message: ----------- LOGGING: reduced stacktrace output, upped info info log to a warning TWEAK: removed comments and some whitespace changes Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-04-09 02:01:13 UTC (rev 3030) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilter.java 2010-04-09 02:02:57 UTC (rev 3031) @@ -61,12 +61,12 @@ */ public class RobotExclusionFilter extends ExclusionFilter { - private final static Logger LOGGER = Logger.getLogger(RobotExclusionFilter.class.getName()); - + private final static Logger LOGGER = + Logger.getLogger(RobotExclusionFilter.class.getName()); + private final static String HTTP_PREFIX = "http://"; private final static String ROBOT_SUFFIX = "/robots.txt"; - private static String WWWN_REGEX = "^www[0-9]+\\."; private final static Pattern WWWN_PATTERN = Pattern.compile(WWWN_REGEX); private LiveWebCache webCache = null; @@ -160,11 +160,7 @@ List<String> urlStrings = searchResultToRobotUrlStrings(host); Iterator<String> itr = urlStrings.iterator(); String firstUrlString = null; -// StringBuilder sb = new StringBuilder(); -// for(String ttt : urlStrings) { -// sb.append("RU(").append(ttt).append(")"); -// } -// LOGGER.info("RobotUrls for("+host+")"+sb.toString()); + // loop through them all. As soon as we get a response, store that // in the cache for the FIRST url we tried and return it.. // If we get no responses for any of the robot URLs, use "empty" rules, @@ -202,15 +198,13 @@ LOGGER.info("ROBOT: LiveDocumentNotAvailableException("+urlString+")"); } catch (MalformedURLException e) { - e.printStackTrace(); +// e.printStackTrace(); LOGGER.info("ROBOT: MalformedURLException("+urlString+")"); return null; } catch (IOException e) { - e.printStackTrace(System.err); - LOGGER.info("ROBOT: IOException("+urlString+"):"+e.getLocalizedMessage()); + LOGGER.warning("ROBOT: IOException("+urlString+"):"+e.getLocalizedMessage()); return null; } catch (LiveWebCacheUnavailableException e) { - e.printStackTrace(); LOGGER.info("ROBOT: LiveWebCacheUnavailableException("+urlString+")"); return null; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |