From: <bra...@us...> - 2009-12-09 06:47:47
|
Revision: 2936 http://archive-access.svn.sourceforge.net/archive-access/?rev=2936&view=rev Author: bradtofel Date: 2009-12-09 06:47:35 +0000 (Wed, 09 Dec 2009) Log Message: ----------- Hackery to get live web caching Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java 2009-12-09 06:47:35 UTC (rev 2936) @@ -0,0 +1,157 @@ +/* ARCCachingProxy + * + * $Id$: + * + * Created on Dec 8, 2009. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.liveweb; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.net.URL; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.log4j.Logger; +import org.archive.io.arc.ARCLocation; +import org.archive.io.arc.ARCRecord; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.core.Resource; +import org.archive.wayback.exception.LiveDocumentNotAvailableException; +import org.archive.wayback.resourcestore.resourcefile.ArcResource; +import org.archive.wayback.webapp.ServletRequestContext; + +/** + * @author brad + * + */ +public class ARCCachingProxy extends ServletRequestContext { + + private final static String EXPIRES_HEADER = "Expires"; + + private final static String ARC_RECORD_CONTENT_TYPE = "application/x-arc-record"; + private static final Logger LOGGER = Logger.getLogger( + ARCCachingProxy.class.getName()); + private ARCCacheDirectory arcCacheDir = null; + private URLCacher cacher = null; + private long expiresMS = 60 * 60 * 1000; + /* (non-Javadoc) + * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse) + */ + @Override + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws ServletException, + IOException { + + StringBuffer sb = httpRequest.getRequestURL(); + String query = httpRequest.getQueryString(); + if(query != null) { + sb.append("?").append(query); + } + URL url = new URL(sb.toString()); + FileRegion r = null; + try { + r = getLiveResource(url); + httpResponse.setStatus(httpResponse.SC_OK); + httpResponse.setContentLength((int)r.getLength()); + httpResponse.setContentType(ARC_RECORD_CONTENT_TYPE); + httpResponse.setDateHeader("Expires", System.currentTimeMillis() + expiresMS); + r.copyToOutputStream(httpResponse.getOutputStream()); + + } catch (LiveDocumentNotAvailableException e) { + + e.printStackTrace(); + httpResponse.sendError(httpResponse.SC_NOT_FOUND); + } +// httpResponse.setContentType("text/plain"); +// PrintWriter pw = httpResponse.getWriter(); +// pw.println("PathInfo:" + httpRequest.getPathInfo()); +// pw.println("RequestURI:" + httpRequest.getRequestURI()); +// pw.println("RequestURL:" + httpRequest.getRequestURL()); +// pw.println("QueryString:" + httpRequest.getQueryString()); +// pw.println("PathTranslated:" + httpRequest.getPathTranslated()); +// pw.println("ServletPath:" + httpRequest.getServletPath()); +// pw.println("ContextPath:" + httpRequest.getContextPath()); +// if(r != null) { +// pw.println("CachePath:" + r.file.getAbsolutePath()); +// pw.println("CacheStart:" + r.start); +// pw.println("CacheEnd:" + r.end); +// } else { +// pw.println("FAILED CACHE!"); +// } + + return true; + } + + + private FileRegion getLiveResource(URL url) + throws LiveDocumentNotAvailableException, IOException { + + Resource resource = null; + + LOGGER.info("Caching URL(" + url.toString() + ")"); + FileRegion region = cacher.cache2(arcCacheDir, url.toString()); + if(region != null) { + LOGGER.info("Cached URL(" + url.toString() + ") in " + + "ARC(" + region.file.getAbsolutePath() + ") at (" + + region.start + " - " + region.end + ")"); + + } else { + throw new IOException("No location!"); + } + + return region; +} + + /** + * @return the arcCacheDir + */ + public ARCCacheDirectory getArcCacheDir() { + return arcCacheDir; + } + + /** + * @param arcCacheDir the arcCacheDir to set + */ + public void setArcCacheDir(ARCCacheDirectory arcCacheDir) { + this.arcCacheDir = arcCacheDir; + } + + /** + * @return the cacher + */ + public URLCacher getCacher() { + return cacher; + } + + /** + * @param cacher the cacher to set + */ + public void setCacher(URLCacher cacher) { + this.cacher = cacher; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java 2009-12-09 06:47:35 UTC (rev 2936) @@ -0,0 +1,62 @@ +/* FileRegion + * + * $Id$: + * + * Created on Dec 8, 2009. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.liveweb; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.io.RandomAccessFile; + +/** + * @author brad + * + */ +public class FileRegion { + File file = null; + long start = -1; + long end = -1; + public long getLength() { + return end - start; + } + public void copyToOutputStream(OutputStream o) throws IOException { + long left = end - start; + int BUFF_SIZE = 4096; + byte buf[] = new byte[BUFF_SIZE]; + RandomAccessFile raf = new RandomAccessFile(file, "r"); + raf.seek(start); + while(left > 0) { + int amtToRead = (int) Math.min(left, BUFF_SIZE); + int amtRead = raf.read(buf, 0, amtToRead); + if(amtRead < 0) { + throw new IOException("Not enough to read! EOF before expected region end"); + } + o.write(buf,0,amtRead); + left -= amtRead; + } + raf.close(); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2009-12-01 23:21:59 UTC (rev 2935) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2009-12-09 06:47:35 UTC (rev 2936) @@ -156,21 +156,52 @@ writer.write(url,mime,ip,captureDate.getTime(),len,fis); writer.checkSize(); -// long newSize = writer.getPosition(); -// long oSize = writer.getFile().length(); + long newSize = writer.getPosition(); + long oSize = writer.getFile().length(); + final long arcEndOffset = oSize; LOGGER.info("Wrote " + url + " at " + arcPath + ":" + arcOffset); + LOGGER.info("NewSize:" + newSize + " oSize: " + oSize); fis.close(); return new ARCLocation() { private String filename = arcPath; private long offset = arcOffset; + private long endOffset = arcEndOffset; public String getName() { return this.filename; } - public long getOffset() { return this.offset; } + public long getEndOffset() { return this.endOffset; } + }; } + private FileRegion storeFile2(File file, ARCWriter writer, String url, + ExtendedGetMethod method) throws IOException { + + FileInputStream fis = new FileInputStream(file); + int len = (int) file.length(); + String mime = method.getMime(); + String ip = method.getRemoteIP(); + Date captureDate = method.getCaptureDate(); + + writer.checkSize(); + final long arcOffset = writer.getPosition(); + final String arcPath = writer.getFile().getAbsolutePath(); + writer.write(url,mime,ip,captureDate.getTime(),len,fis); + writer.checkSize(); + long newSize = writer.getPosition(); + long oSize = writer.getFile().length(); + final long arcEndOffset = oSize; + LOGGER.info("Wrote " + url + " at " + arcPath + ":" + arcOffset); + LOGGER.info("NewSize:" + newSize + " oSize: " + oSize); + fis.close(); + FileRegion fr = new FileRegion(); + fr.file = writer.getFile(); + fr.start = arcOffset; + fr.end = oSize; + return fr; + } + /** * Retrieve urlString, and store using ARCWriter, returning * ARCLocation where the document was stored. @@ -219,7 +250,44 @@ } return location; } + public FileRegion cache2(ARCCacheDirectory cache, String urlString) + throws LiveDocumentNotAvailableException, IOException, URIException { + // localize URL + File tmpFile = getTmpFile(); + ExtendedGetMethod method; + try { + method = urlToFile(urlString,tmpFile); + } catch (LiveDocumentNotAvailableException e) { + LOGGER.info("Attempted to get " + urlString + " failed..."); + tmpFile.delete(); + throw e; + } catch (URIException e) { + tmpFile.delete(); + throw e; + } catch (IOException e) { + tmpFile.delete(); + throw e; + } + + // store URL + FileRegion region = null; + ARCWriter writer = null; + try { + writer = cache.getWriter(); + region = storeFile2(tmpFile, writer, urlString, method); + } catch(IOException e) { + e.printStackTrace(); + throw e; + } finally { + if(writer != null) { + cache.returnWriter(writer); + } + tmpFile.delete(); + } + return region; +} + /** * @param args */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |