|
From: <bra...@us...> - 2009-12-09 06:47:47
|
Revision: 2936
http://archive-access.svn.sourceforge.net/archive-access/?rev=2936&view=rev
Author: bradtofel
Date: 2009-12-09 06:47:35 +0000 (Wed, 09 Dec 2009)
Log Message:
-----------
Hackery to get live web caching
Modified Paths:
--------------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java
Added Paths:
-----------
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java
trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java 2009-12-09 06:47:35 UTC (rev 2936)
@@ -0,0 +1,157 @@
+/* ARCCachingProxy
+ *
+ * $Id$:
+ *
+ * Created on Dec 8, 2009.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.liveweb;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.net.URL;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.log4j.Logger;
+import org.archive.io.arc.ARCLocation;
+import org.archive.io.arc.ARCRecord;
+import org.archive.wayback.core.CaptureSearchResult;
+import org.archive.wayback.core.Resource;
+import org.archive.wayback.exception.LiveDocumentNotAvailableException;
+import org.archive.wayback.resourcestore.resourcefile.ArcResource;
+import org.archive.wayback.webapp.ServletRequestContext;
+
+/**
+ * @author brad
+ *
+ */
+public class ARCCachingProxy extends ServletRequestContext {
+
+ private final static String EXPIRES_HEADER = "Expires";
+
+ private final static String ARC_RECORD_CONTENT_TYPE = "application/x-arc-record";
+ private static final Logger LOGGER = Logger.getLogger(
+ ARCCachingProxy.class.getName());
+ private ARCCacheDirectory arcCacheDir = null;
+ private URLCacher cacher = null;
+ private long expiresMS = 60 * 60 * 1000;
+ /* (non-Javadoc)
+ * @see org.archive.wayback.webapp.ServletRequestContext#handleRequest(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
+ */
+ @Override
+ public boolean handleRequest(HttpServletRequest httpRequest,
+ HttpServletResponse httpResponse) throws ServletException,
+ IOException {
+
+ StringBuffer sb = httpRequest.getRequestURL();
+ String query = httpRequest.getQueryString();
+ if(query != null) {
+ sb.append("?").append(query);
+ }
+ URL url = new URL(sb.toString());
+ FileRegion r = null;
+ try {
+ r = getLiveResource(url);
+ httpResponse.setStatus(httpResponse.SC_OK);
+ httpResponse.setContentLength((int)r.getLength());
+ httpResponse.setContentType(ARC_RECORD_CONTENT_TYPE);
+ httpResponse.setDateHeader("Expires", System.currentTimeMillis() + expiresMS);
+ r.copyToOutputStream(httpResponse.getOutputStream());
+
+ } catch (LiveDocumentNotAvailableException e) {
+
+ e.printStackTrace();
+ httpResponse.sendError(httpResponse.SC_NOT_FOUND);
+ }
+// httpResponse.setContentType("text/plain");
+// PrintWriter pw = httpResponse.getWriter();
+// pw.println("PathInfo:" + httpRequest.getPathInfo());
+// pw.println("RequestURI:" + httpRequest.getRequestURI());
+// pw.println("RequestURL:" + httpRequest.getRequestURL());
+// pw.println("QueryString:" + httpRequest.getQueryString());
+// pw.println("PathTranslated:" + httpRequest.getPathTranslated());
+// pw.println("ServletPath:" + httpRequest.getServletPath());
+// pw.println("ContextPath:" + httpRequest.getContextPath());
+// if(r != null) {
+// pw.println("CachePath:" + r.file.getAbsolutePath());
+// pw.println("CacheStart:" + r.start);
+// pw.println("CacheEnd:" + r.end);
+// } else {
+// pw.println("FAILED CACHE!");
+// }
+
+ return true;
+ }
+
+
+ private FileRegion getLiveResource(URL url)
+ throws LiveDocumentNotAvailableException, IOException {
+
+ Resource resource = null;
+
+ LOGGER.info("Caching URL(" + url.toString() + ")");
+ FileRegion region = cacher.cache2(arcCacheDir, url.toString());
+ if(region != null) {
+ LOGGER.info("Cached URL(" + url.toString() + ") in " +
+ "ARC(" + region.file.getAbsolutePath() + ") at ("
+ + region.start + " - " + region.end + ")");
+
+ } else {
+ throw new IOException("No location!");
+ }
+
+ return region;
+}
+
+ /**
+ * @return the arcCacheDir
+ */
+ public ARCCacheDirectory getArcCacheDir() {
+ return arcCacheDir;
+ }
+
+ /**
+ * @param arcCacheDir the arcCacheDir to set
+ */
+ public void setArcCacheDir(ARCCacheDirectory arcCacheDir) {
+ this.arcCacheDir = arcCacheDir;
+ }
+
+ /**
+ * @return the cacher
+ */
+ public URLCacher getCacher() {
+ return cacher;
+ }
+
+ /**
+ * @param cacher the cacher to set
+ */
+ public void setCacher(URLCacher cacher) {
+ this.cacher = cacher;
+ }
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/ARCCachingProxy.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java 2009-12-09 06:47:35 UTC (rev 2936)
@@ -0,0 +1,62 @@
+/* FileRegion
+ *
+ * $Id$:
+ *
+ * Created on Dec 8, 2009.
+ *
+ * Copyright (C) 2006 Internet Archive.
+ *
+ * This file is part of Wayback.
+ *
+ * Wayback is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * any later version.
+ *
+ * Wayback is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser Public License
+ * along with Wayback; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package org.archive.wayback.liveweb;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+
+/**
+ * @author brad
+ *
+ */
+public class FileRegion {
+ File file = null;
+ long start = -1;
+ long end = -1;
+ public long getLength() {
+ return end - start;
+ }
+ public void copyToOutputStream(OutputStream o) throws IOException {
+ long left = end - start;
+ int BUFF_SIZE = 4096;
+ byte buf[] = new byte[BUFF_SIZE];
+ RandomAccessFile raf = new RandomAccessFile(file, "r");
+ raf.seek(start);
+ while(left > 0) {
+ int amtToRead = (int) Math.min(left, BUFF_SIZE);
+ int amtRead = raf.read(buf, 0, amtToRead);
+ if(amtRead < 0) {
+ throw new IOException("Not enough to read! EOF before expected region end");
+ }
+ o.write(buf,0,amtRead);
+ left -= amtRead;
+ }
+ raf.close();
+ }
+
+}
Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/FileRegion.java
___________________________________________________________________
Added: svn:keywords
+ Author Date Revision Id
Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2009-12-01 23:21:59 UTC (rev 2935)
+++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLCacher.java 2009-12-09 06:47:35 UTC (rev 2936)
@@ -156,21 +156,52 @@
writer.write(url,mime,ip,captureDate.getTime(),len,fis);
writer.checkSize();
-// long newSize = writer.getPosition();
-// long oSize = writer.getFile().length();
+ long newSize = writer.getPosition();
+ long oSize = writer.getFile().length();
+ final long arcEndOffset = oSize;
LOGGER.info("Wrote " + url + " at " + arcPath + ":" + arcOffset);
+ LOGGER.info("NewSize:" + newSize + " oSize: " + oSize);
fis.close();
return new ARCLocation() {
private String filename = arcPath;
private long offset = arcOffset;
+ private long endOffset = arcEndOffset;
public String getName() { return this.filename; }
-
public long getOffset() { return this.offset; }
+ public long getEndOffset() { return this.endOffset; }
+
};
}
+ private FileRegion storeFile2(File file, ARCWriter writer, String url,
+ ExtendedGetMethod method) throws IOException {
+
+ FileInputStream fis = new FileInputStream(file);
+ int len = (int) file.length();
+ String mime = method.getMime();
+ String ip = method.getRemoteIP();
+ Date captureDate = method.getCaptureDate();
+
+ writer.checkSize();
+ final long arcOffset = writer.getPosition();
+ final String arcPath = writer.getFile().getAbsolutePath();
+ writer.write(url,mime,ip,captureDate.getTime(),len,fis);
+ writer.checkSize();
+ long newSize = writer.getPosition();
+ long oSize = writer.getFile().length();
+ final long arcEndOffset = oSize;
+ LOGGER.info("Wrote " + url + " at " + arcPath + ":" + arcOffset);
+ LOGGER.info("NewSize:" + newSize + " oSize: " + oSize);
+ fis.close();
+ FileRegion fr = new FileRegion();
+ fr.file = writer.getFile();
+ fr.start = arcOffset;
+ fr.end = oSize;
+ return fr;
+ }
+
/**
* Retrieve urlString, and store using ARCWriter, returning
* ARCLocation where the document was stored.
@@ -219,7 +250,44 @@
}
return location;
}
+ public FileRegion cache2(ARCCacheDirectory cache, String urlString)
+ throws LiveDocumentNotAvailableException, IOException, URIException {
+ // localize URL
+ File tmpFile = getTmpFile();
+ ExtendedGetMethod method;
+ try {
+ method = urlToFile(urlString,tmpFile);
+ } catch (LiveDocumentNotAvailableException e) {
+ LOGGER.info("Attempted to get " + urlString + " failed...");
+ tmpFile.delete();
+ throw e;
+ } catch (URIException e) {
+ tmpFile.delete();
+ throw e;
+ } catch (IOException e) {
+ tmpFile.delete();
+ throw e;
+ }
+
+ // store URL
+ FileRegion region = null;
+ ARCWriter writer = null;
+ try {
+ writer = cache.getWriter();
+ region = storeFile2(tmpFile, writer, urlString, method);
+ } catch(IOException e) {
+ e.printStackTrace();
+ throw e;
+ } finally {
+ if(writer != null) {
+ cache.returnWriter(writer);
+ }
+ tmpFile.delete();
+ }
+ return region;
+}
+
/**
* @param args
*/
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|