You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: <bra...@us...> - 2008-06-24 22:55:27
|
Revision: 2305 http://archive-access.svn.sourceforge.net/archive-access/?rev=2305&view=rev Author: bradtofel Date: 2008-06-24 15:55:35 -0700 (Tue, 24 Jun 2008) Log Message: ----------- INITIAL REV: ResourceFile abstraction, including ResourceFileSource interface, which will allow recursive local directories, polling of local and remote HTTP exported directories Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcWarcFilenameFilter.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileLocation.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcWarcFilenameFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcWarcFilenameFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcWarcFilenameFilter.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,50 @@ +/* ArcWarcFilenameFilter + * + * $Id$ + * + * Created on 4:15:56 PM May 29, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.File; +import java.io.FilenameFilter; + +/** + * FilenameFilter which returns only compressed/uncompressed ARC/WARC files. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ArcWarcFilenameFilter implements FilenameFilter { + private final static String ARC_SUFFIX = ".arc"; + private final static String ARC_GZ_SUFFIX = ".arc.gz"; + private final static String WARC_SUFFIX = ".warc"; + private final static String WARC_GZ_SUFFIX = ".warc.gz"; + + public boolean accept(File dir, String name) { + return name.endsWith(ARC_SUFFIX) || + name.endsWith(ARC_GZ_SUFFIX) || + name.endsWith(WARC_SUFFIX) || + name.endsWith(WARC_GZ_SUFFIX); + } + +} + Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/DirectoryResourceFileSource.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,144 @@ +/* DirectoryResourceFileSource + * + * $Id$ + * + * Created on 4:00:49 PM May 29, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Local directory tree holding ARC and WARC files. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class DirectoryResourceFileSource implements ResourceFileSource { + + private static char SEPRTR = '_'; + private String name = null; + private String path = null; + private File root = null; + private FilenameFilter filter = new ArcWarcFilenameFilter(); + private boolean recurse = true; + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getFileList() + */ + public ResourceFileList getResourceFileList() throws IOException { + if(root == null) { + throw new IOException("No prefix set"); + } + ResourceFileList list = new ResourceFileList(); + populateFileList(list,root,recurse); + return list; + } + + /** + * add all files matching this.filter beneath root to list, recursing if + * recurse is set. + * + * @param list + * @param root + * @param recurse + * @throws IOException + */ + private void populateFileList(ResourceFileList list, File root, boolean recurse) + throws IOException { + + File[] files = root.listFiles(); + for(File file : files) { + if(file.isFile() && filter.accept(root, file.getName())) { + ResourceFileLocation location = new ResourceFileLocation( + file.getName(),file.getAbsolutePath()); + list.add(location); + } else if(recurse && file.isDirectory()){ + populateFileList(list, file, recurse); + } + } + } + + public String getBasename(String path) { + int sepIdx = path.lastIndexOf(File.separatorChar); + if(sepIdx != -1) { + return path.substring(sepIdx + 1); + } + return path; + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getName() + */ + public String getName() { + if(name != null) { + return name; + } + if(root != null) { + return root.getAbsolutePath().replace(File.separatorChar, SEPRTR); + } + return null; + } + + public void setName(String name) { + this.name = name; + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getPrefix() + */ + public String getPrefix() { + return path; + } + public void setPrefix(String path) { + this.path = path; + root = new File(path); + } + + public boolean isRecurse() { + return recurse; + } + + public void setRecurse(boolean recurse) { + this.recurse = recurse; + } + + public FilenameFilter getFilter() { + return filter; + } + + public void setFilter(FilenameFilter filter) { + this.filter = filter; + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getSources() + */ + public List<ResourceFileSource> getSources() { + List<ResourceFileSource> sources = new ArrayList<ResourceFileSource>(); + sources.add(this); + return sources; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/JspUrlResourceFileSource.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,116 @@ +/* JspUrlResourceFileSource + * + * $Id$ + * + * Created on 5:05:53 PM Jun 5, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class JspUrlResourceFileSource implements ResourceFileSource { + + private final static char WEB_SEPARATOR_CHAR = '/'; + private final static String LINE_SEPARATOR_STRING = "\n"; + private String name = null; + private String prefix = null; + private String jsp = null; + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getBasename(java.lang.String) + */ + public String getBasename(String path) { + int sepIdx = path.lastIndexOf(WEB_SEPARATOR_CHAR); + if(sepIdx != -1) { + return path.substring(sepIdx + 1); + } + return path; + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getFileList() + */ + public ResourceFileList getResourceFileList() throws IOException { + + String url = "http://localhost:8080" + jsp + "?url=" + prefix; + URL u = new URL(url); + InputStream is = u.openStream(); + InputStreamReader isr = new InputStreamReader(is); + StringBuilder sb = new StringBuilder(2000); + int READ_SIZE = 2048; + char cbuf[] = new char[READ_SIZE]; + int amt = 0; + while((amt = isr.read(cbuf, 0, READ_SIZE)) != -1) { + sb.append(new String(cbuf,0,amt)); + } + ResourceFileList list = new ResourceFileList(); + String lines[] = sb.toString().split(LINE_SEPARATOR_STRING); + for(String line : lines) { + ResourceFileLocation location = + ResourceFileLocation.deserializeLine(line); + if(location != null) { + list.add(location); + } else { + throw new IOException("Bad line format(" + line +")"); + } + } + return list; + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getName() + */ + public String getName() { + return name; + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.resourcefile.ResourceFileSource#getPrefix() + */ + public String getPrefix() { + return prefix; + } + + public void setName(String name) { + this.name = name; + } + + public void setPrefix(String prefix) { + this.prefix = prefix; + } + + public String getJsp() { + return jsp; + } + + public void setJsp(String jsp) { + this.jsp = jsp; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileList.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,119 @@ +/* ResourceFileList + * + * $Id$ + * + * Created on 12:15:53 PM Jun 16, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.archive.wayback.util.AdaptedIterator; +import org.archive.wayback.util.Adapter; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.flatfile.FlatFile; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ResourceFileList { + private static final Logger LOGGER = + Logger.getLogger(ResourceFileList.class.getName()); + + private HashMap<String,ResourceFileLocation> files = + new HashMap<String,ResourceFileLocation>(); + public void add(ResourceFileLocation location) { + files.put(location.serializeLine(), location); + } + public void addAll(Iterator<ResourceFileLocation> itr) { + while(itr.hasNext()) { + add(itr.next()); + } + } + + public Iterator<ResourceFileLocation> iterator() { + return files.values().iterator(); + } + + public void store(File target) throws IOException { + FlatFile ff = new FlatFile(target.getAbsolutePath()); + Iterator<String> adapted = + new AdaptedIterator<ResourceFileLocation,String>(iterator(), + new ResourceFileLocationAdapter()); + ff.store(adapted); + } + + public static ResourceFileList load(File source) throws IOException { + ResourceFileList list = new ResourceFileList(); + + FlatFile ff = new FlatFile(source.getAbsolutePath()); + CloseableIterator<String> itr = ff.getSequentialIterator(); + while(itr.hasNext()) { + String line = itr.next(); + ResourceFileLocation location = + ResourceFileLocation.deserializeLine(line); + if(location != null) { + list.add(location); + } else { + LOGGER.warning("Bad parse of line(" + line + ") in (" + + source.getAbsolutePath() + ")"); + } + } + itr.close(); + return list; + } + + public ResourceFileList subtract(ResourceFileList that) { + HashMap<String,ResourceFileLocation> tmp = + new HashMap<String,ResourceFileLocation>(); + Iterator<ResourceFileLocation> thisItr = iterator(); + while(thisItr.hasNext()) { + ResourceFileLocation location = thisItr.next(); + tmp.put(location.serializeLine(), location); + } + + Iterator<ResourceFileLocation> thatItr = that.iterator(); + while(thatItr.hasNext()) { + ResourceFileLocation location = thatItr.next(); + tmp.remove(location.serializeLine()); + } + ResourceFileList sub = new ResourceFileList(); + sub.addAll(tmp.values().iterator()); + return sub; + } + + private class ResourceFileLocationAdapter implements Adapter<ResourceFileLocation,String> { + + /* (non-Javadoc) + * @see org.archive.wayback.util.Adapter#adapt(java.lang.Object) + */ + public String adapt(ResourceFileLocation o) { + return o.serializeLine(); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileLocation.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileLocation.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileLocation.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,80 @@ +/* ResourceFileLocation + * + * $Id$ + * + * Created on 12:16:04 PM Jun 16, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +/** + * Class encapsulating the name and String location(url/path) of a ResourceFile. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ResourceFileLocation { + private final static char DELIMETER = '\t'; + private String name = null; + private String url = null; + public ResourceFileLocation(String name, String url) { + this.name = name; + this.url = url; + } + public String serializeLine() { + StringBuilder sb = new StringBuilder(100); + sb.append(name); + sb.append(DELIMETER); + sb.append(url); + return sb.toString(); + } + public static ResourceFileLocation deserializeLine(String line) { + int idx = line.indexOf(DELIMETER); + if(idx > -1) { + return new ResourceFileLocation(line.substring(0,idx), + line.substring(idx+1)); + } + return null; + } + /** + * @return the name + */ + public String getName() { + return name; + } + /** + * @param name the name to set + */ + public void setName(String name) { + this.name = name; + } + /** + * @return the url + */ + public String getUrl() { + return url; + } + /** + * @param url the url to set + */ + public void setUrl(String url) { + this.url = url; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSource.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSource.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,41 @@ +/* ResourceFileSource + * + * $Id$ + * + * Created on 3:49:17 PM May 29, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.IOException; + +/** + * Interface representing the abstract remote or local folder holding ARC/WARC + * files. + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface ResourceFileSource { + public String getName(); + public String getPrefix(); + public String getBasename(String path); + public ResourceFileList getResourceFileList() throws IOException; +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFileSourceUpdater.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,162 @@ +/* ResourceFileSourceUpdater + * + * $Id$ + * + * Created on 12:30:38 PM Jun 23, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.logging.Logger; + +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBUpdater; +import org.archive.wayback.util.DirMaker; + +/** + * Class which repeatedly builds a ResourceFileList for a set of + * ResourceFileSource objects, serializing them into files, and dropping them + * into the incoming directory of a ResourceFileLocationDBUpdater. + * + * In the current implementation, this uses only a single thread to scan the + * ResourceFileSource objects, but with larger installations (1000's of + * ResourceFileSources), multiple threads may later be required. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ResourceFileSourceUpdater { + private static final Logger LOGGER = + Logger.getLogger(ResourceFileSourceUpdater.class.getName()); + private List<ResourceFileSource> sources = null; + + private File target = null; + + + private UpdateThread thread = null; + private long interval = 120000; + + public void init() { + if(interval > 0) { + thread = new UpdateThread(this,interval); + thread.start(); + } + } + + public void shutdown() { + if(thread != null) { + thread.interrupt(); + try { + thread.join(1000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + + private void synchronizeSource(ResourceFileSource source) { + String name = source.getName(); + try { + LOGGER.fine("Synchronizing " + name); + ResourceFileList list = source.getResourceFileList(); + String tmp = name + ResourceFileLocationDBUpdater.TMP_SUFFIX; + File tmpListTarget = new File(target,tmp); + File listTarget = new File(target,name); + list.store(tmpListTarget); + tmpListTarget.renameTo(listTarget); + LOGGER.fine("Synchronized " + name); + } catch (IOException e) { + e.printStackTrace(); + LOGGER.warning("FAILED Synchronize " + name + e.getMessage()); + } + } + + public void synchronizeSources() { + for(ResourceFileSource source : sources) { + synchronizeSource(source); + } + } + + private class UpdateThread extends Thread { + private long runInterval = 120000; + private ResourceFileSourceUpdater updater = null; + + public UpdateThread(ResourceFileSourceUpdater updater, + long runInterval) { + + this.updater = updater; + this.runInterval = runInterval; + } + + public void run() { + LOGGER.info("alive"); + while (true) { + try { + long startSync = System.currentTimeMillis(); + updater.synchronizeSources(); + long endSync = System.currentTimeMillis(); + long syncDuration = endSync - startSync; + long sleepInterval = runInterval - syncDuration; + if(sleepInterval > 0) { + sleep(sleepInterval); + } else { + LOGGER.warning("Last Synchronize took " + syncDuration + + " where interval is " + interval + + ". Not sleeping."); + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + } + + public List<ResourceFileSource> getSources() { + return sources; + } + + public void setSources(List<ResourceFileSource> sources) { + this.sources = sources; + } + + public String getTarget() { + return DirMaker.getAbsolutePath(target); + } + + public void setTarget(String target) throws IOException { + this.target = DirMaker.ensureDir(target); + } + + /** + * @return the interval + */ + public long getInterval() { + return interval; + } + + /** + * @param interval the interval to set + */ + public void setInterval(long interval) { + this.interval = interval; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/UrlLinkExtractor.java 2008-06-24 22:55:35 UTC (rev 2305) @@ -0,0 +1,105 @@ +/* UrlLinkExtractor + * + * $Id$ + * + * Created on 4:26:53 PM Jun 5, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.resourcefile; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class UrlLinkExtractor { + private final static String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")"; + + private final static String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")"; + + private final static String APOSED_ATTR_VALUE = "(?:'[^'>]*')"; + + private final static String RAW_ATTR_VALUE = "(?:[^ \\t\\n\\x0B\\f\\r>\"']+)"; + + + private final static String ANY_ATTR_VALUE = QUOTED_ATTR_VALUE + "|" + + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" + + RAW_ATTR_VALUE; + + private final static String tagName = "a"; + private final static String attrName = "href"; + + private final static String tagPatString = "<\\s*" + tagName + + "\\s+[^>]*\\b" + attrName + + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; + + private final static Pattern pc = Pattern.compile(tagPatString, + Pattern.CASE_INSENSITIVE); + + public static List<String> extractLinks(final String url) throws IOException { + URL u = new URL(url); + InputStream is = u.openStream(); + InputStreamReader isr = new InputStreamReader(is); + StringBuilder sb = new StringBuilder(2000); + int READ_SIZE = 2048; + char cbuf[] = new char[READ_SIZE]; + int amt = 0; + while((amt = isr.read(cbuf, 0, READ_SIZE)) != -1) { + sb.append(new String(cbuf,0,amt)); + } + return extractAnchors(sb); + } + + private static List<String> extractAnchors(final StringBuilder sb) { + + Matcher m = pc.matcher(sb); + + ArrayList<String> anchors = new ArrayList<String>(); + int idx = 0; + while(m.find(idx)) { + anchors.add(trimAttr(m.group(1))); + idx = m.end(1); + } + return anchors; + } + + private static String trimAttr(final String attr) { + int attrLength = attr.length(); + if (attr.charAt(0) == '"') { + return attr.substring(1, attrLength - 1); + } else if (attr.charAt(0) == '\'') { + return attr.substring(1, attrLength - 1); + } else if (attr.charAt(0) == '\\') { + return attr.substring(2, attrLength - 2); + } + return attr; + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-24 22:52:30
|
Revision: 2304 http://archive-access.svn.sourceforge.net/archive-access/?rev=2304&view=rev Author: bradtofel Date: 2008-06-24 15:52:35 -0700 (Tue, 24 Jun 2008) Log Message: ----------- REFACTOR: moved renderException from QueryRenderer and ReplayDispatcher to ExceptionRenderer. REFACTOR: changed interface of ReplayDispatcher to return a ReplayRenderer. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/QueryRenderer.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -32,7 +32,6 @@ import org.archive.wayback.core.SearchResults; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.WaybackException; /** * @@ -42,20 +41,6 @@ */ public interface QueryRenderer { - /** - * Render the contents of a WaybackException in either xml or html format. - * - * @param httpRequest - * @param httpResponse - * @param wbRequest - * @param exception - * @throws ServletException - * @throws IOException - */ - public void renderException(HttpServletRequest httpRequest, - HttpServletResponse httpResponse, WaybackRequest wbRequest, - WaybackException exception) throws ServletException, IOException; - /** Show the SearchResults of the request for this particular URL * * @param httpRequest the HttpServletRequest Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ReplayDispatcher.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -24,14 +24,9 @@ */ package org.archive.wayback; -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - +import org.archive.wayback.core.Resource; +import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.WaybackException; /** * @@ -39,21 +34,16 @@ * @author brad * @version $Date$, $Revision$ */ -public interface ReplayDispatcher extends ReplayRenderer { +public interface ReplayDispatcher { /** - * Render the contents of a WaybackException in either html, javascript, or - * css format, depending on the guessed context, so errors in embedded - * documents do not cause unneeded errors in the embedding document. * - * @param httpRequest - * @param httpResponse + * Return a ReplayRenderer appropriate for the Resource. + * * @param wbRequest - * @param exception - * @throws ServletException - * @throws IOException + * @param result + * @param resource + * @return the correct ReplayRenderer for the Resource */ - public void renderException(HttpServletRequest httpRequest, - HttpServletResponse httpResponse, WaybackRequest wbRequest, - WaybackException exception) throws ServletException, IOException; - + public ReplayRenderer getRenderer(WaybackRequest wbRequest, + SearchResult result, Resource resource); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -26,12 +26,12 @@ import java.util.List; +import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.replay.BaseReplayDispatcher; import org.archive.wayback.replay.DateRedirectReplayRenderer; /** @@ -40,8 +40,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class ArchivalUrlReplayDispatcher - extends BaseReplayDispatcher { +public class ArchivalUrlReplayDispatcher implements ReplayDispatcher { /** * MIME type of documents which should be marked up with javascript to @@ -69,9 +68,8 @@ new ArchivalUrlASXReplayRenderer(); /* (non-Javadoc) - * @see org.archive.wayback.replay.ReplayRendererDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) + * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ - @Override public ReplayRenderer getRenderer(WaybackRequest wbRequest, SearchResult result, Resource resource) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -24,12 +24,12 @@ */ package org.archive.wayback.domainprefix; +import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.replay.BaseReplayDispatcher; import org.archive.wayback.replay.DateRedirectReplayRenderer; import org.archive.wayback.replay.TransparentReplayRenderer; @@ -39,7 +39,7 @@ * @author brad * @version $Date$, $Revision$ */ -public class DomainPrefixReplayDispatcher extends BaseReplayDispatcher { +public class DomainPrefixReplayDispatcher implements ReplayDispatcher { private final static String TEXT_HTML_MIME = "text/html"; private final static String TEXT_XHTML_MIME = "application/xhtml"; @@ -53,9 +53,8 @@ private DomainPrefixReplayRenderer html = new DomainPrefixReplayRenderer(); /* (non-Javadoc) - * @see org.archive.wayback.replay.BaseReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) + * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ - @Override public ReplayRenderer getRenderer(WaybackRequest wbRequest, SearchResult result, Resource resource) { // if the result is not for the exact date requested, redirect to the Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayDispatcher.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -24,27 +24,27 @@ */ package org.archive.wayback.proxy; +import org.archive.wayback.ReplayDispatcher; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.replay.BaseReplayDispatcher; import org.archive.wayback.replay.TransparentReplayRenderer; /** + * Trivial Replay dispatcher implementation, generally used in Proxy mode, that + * sends all documents back as-is. * - * * @author brad * @version $Date$, $Revision$ */ -public class ProxyReplayDispatcher extends BaseReplayDispatcher { +public class ProxyReplayDispatcher implements ReplayDispatcher { private ReplayRenderer renderer = new TransparentReplayRenderer(); - + /* (non-Javadoc) - * @see org.archive.wayback.replay.ReplayRendererDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) + * @see org.archive.wayback.ReplayDispatcher#getRenderer(org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource) */ - @Override public ReplayRenderer getRenderer(WaybackRequest wbRequest, SearchResult result, Resource resource) { // always use the transparent: Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -35,20 +35,18 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.core.SearchResults; -import org.archive.wayback.core.UIResults; import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.WaybackException; /** + * Brain-dead simple QueryRenderer implementation, which shunts all the work off + * to a .jsp file as defined by administrators. Also has basic logic to switch + * to a different .jsp to format request asking for XML data. * - * * @author brad * @version $Date$, $Revision$ */ public class Renderer implements QueryRenderer { - private String errorJsp = "/jsp/HTMLError.jsp"; - private String xmlErrorJsp = "/jsp/XMLError.jsp"; private String captureJsp = "/jsp/HTMLResults.jsp"; private String urlJsp = "/jsp/HTMLResults.jsp"; private String xmlJsp = "/jsp/XMLResults.jsp"; @@ -68,21 +66,6 @@ dispatcher.forward(request, response); } - public void renderException(HttpServletRequest httpRequest, - HttpServletResponse httpResponse, WaybackRequest wbRequest, - WaybackException exception) throws ServletException, IOException { - - httpRequest.setAttribute("exception", exception); - UIResults uiResults = new UIResults(wbRequest); - String jsp = errorJsp; - if(wbRequest.containsKey(WaybackConstants.REQUEST_XML_DATA)) { - jsp = xmlErrorJsp; - } - uiResults.storeInRequest(httpRequest,jsp); - - proxyRequest(httpRequest,httpResponse,jsp); - } - public void renderUrlResults(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, SearchResults results, ResultURIConverter uriConverter) @@ -121,20 +104,6 @@ } /** - * @return the errorJsp - */ - public String getErrorJsp() { - return errorJsp; - } - - /** - * @param errorJsp the errorJsp to set - */ - public void setErrorJsp(String errorJsp) { - this.errorJsp = errorJsp; - } - - /** * @return the captureJsp */ public String getCaptureJsp() { @@ -161,4 +130,18 @@ public void setUrlJsp(String urlJsp) { this.urlJsp = urlJsp; } + + /** + * @return the xmlJsp + */ + public String getXmlJsp() { + return xmlJsp; + } + + /** + * @param xmlJsp the xmlJsp to set + */ + public void setXmlJsp(String xmlJsp) { + this.xmlJsp = xmlJsp; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2008-06-24 22:50:52 UTC (rev 2303) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2008-06-24 22:52:35 UTC (rev 2304) @@ -33,8 +33,10 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.archive.wayback.ExceptionRenderer; import org.archive.wayback.QueryRenderer; import org.archive.wayback.ReplayDispatcher; +import org.archive.wayback.ReplayRenderer; import org.archive.wayback.RequestParser; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; @@ -46,7 +48,7 @@ import org.archive.wayback.core.UIResults; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.AuthenticationControlException; -import org.archive.wayback.exception.BadQueryException; +import org.archive.wayback.exception.BaseExceptionRenderer; import org.archive.wayback.exception.ResourceNotAvailableException; import org.archive.wayback.exception.ResourceNotInArchiveException; import org.archive.wayback.exception.WaybackException; @@ -76,6 +78,7 @@ private String contextName = null; private WaybackCollection collection = null; private ReplayDispatcher replay = null; + private ExceptionRenderer exception = new BaseExceptionRenderer(); private QueryRenderer query = null; private RequestParser parser = null; private ResultURIConverter uriConverter = null; @@ -287,10 +290,9 @@ handled = dispatchLocal(httpRequest,httpResponse); } - } catch (BadQueryException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); - } catch (AuthenticationControlException e) { - query.renderException(httpRequest, httpResponse, wbRequest, e); + } catch(WaybackException e) { + logNotInArchive(e,wbRequest); + exception.renderException(httpRequest, httpResponse, wbRequest, e); } return handled; @@ -298,7 +300,7 @@ private void handleReplay(WaybackRequest wbRequest, HttpServletRequest httpRequest, HttpServletResponse httpResponse) - throws IOException, ServletException { + throws IOException, ServletException, WaybackException { Resource resource = null; try { SearchResults results = collection.getResourceIndex().query(wbRequest); @@ -310,12 +312,9 @@ // TODO: check which versions are actually accessible right now? SearchResult closest = captureResults.getClosest(wbRequest); resource = collection.getResourceStore().retrieveResource(closest); - - replay.renderResource(httpRequest, httpResponse, wbRequest, + ReplayRenderer renderer = replay.getRenderer(wbRequest, closest, resource); + renderer.renderResource(httpRequest, httpResponse, wbRequest, closest, resource, uriConverter, captureResults); - } catch(WaybackException e) { - logNotInArchive(e,wbRequest); - replay.renderException(httpRequest, httpResponse, wbRequest, e); } finally { if(resource != null) { resource.close(); @@ -325,26 +324,21 @@ private void handleQuery(WaybackRequest wbRequest, HttpServletRequest httpRequest, HttpServletResponse httpResponse) - throws ServletException, IOException { + throws ServletException, IOException, WaybackException { - try { - SearchResults results = collection.getResourceIndex().query(wbRequest); - if(results.getResultsType().equals( - WaybackConstants.RESULTS_TYPE_CAPTURE)) { - CaptureSearchResults cResults = (CaptureSearchResults) results; - SearchResult closest = cResults.getClosest(wbRequest); - closest.put(WaybackConstants.RESULT_CLOSEST_INDICATOR, - WaybackConstants.RESULT_CLOSEST_VALUE); - query.renderUrlResults(httpRequest,httpResponse,wbRequest, - results,uriConverter); + SearchResults results = collection.getResourceIndex().query(wbRequest); + if(results.getResultsType().equals( + WaybackConstants.RESULTS_TYPE_CAPTURE)) { + CaptureSearchResults cResults = (CaptureSearchResults) results; + SearchResult closest = cResults.getClosest(wbRequest); + closest.put(WaybackConstants.RESULT_CLOSEST_INDICATOR, + WaybackConstants.RESULT_CLOSEST_VALUE); + query.renderUrlResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); - } else { - query.renderUrlPrefixResults(httpRequest,httpResponse,wbRequest, - results,uriConverter); - } - } catch(WaybackException e) { - logNotInArchive(e,wbRequest); - query.renderException(httpRequest, httpResponse, wbRequest, e); + } else { + query.renderUrlPrefixResults(httpRequest,httpResponse,wbRequest, + results,uriConverter); } } @@ -355,6 +349,7 @@ } private void logNotInArchive(WaybackException e, WaybackRequest r) { + // TODO: move this into ResourceNotInArchiveException constructor if(e instanceof ResourceNotInArchiveException) { String url = r.get(WaybackConstants.REQUEST_URL); StringBuilder sb = new StringBuilder(100); @@ -468,4 +463,12 @@ public void setCollection(WaybackCollection collection) { this.collection = collection; } + + public ExceptionRenderer getException() { + return exception; + } + + public void setException(ExceptionRenderer exception) { + this.exception = exception; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2303 http://archive-access.svn.sourceforge.net/archive-access/?rev=2303&view=rev Author: bradtofel Date: 2008-06-24 15:50:52 -0700 (Tue, 24 Jun 2008) Log Message: ----------- COMMENT: fixed class name of javadoc reference. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-06-24 21:42:44 UTC (rev 2302) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayRenderer.java 2008-06-24 22:50:52 UTC (rev 2303) @@ -126,7 +126,7 @@ } /* (non-Javadoc) - * @see org.archive.wayback.replay.HeaderFilter#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) + * @see org.archive.wayback.replay.HttpHeaderProcessor#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResult) */ public void filter(Map<String, String> output, String key, String value, ResultURIConverter uriConverter, SearchResult result) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2300 http://archive-access.svn.sourceforge.net/archive-access/?rev=2300&view=rev Author: bradtofel Date: 2008-06-24 13:52:20 -0700 (Tue, 24 Jun 2008) Log Message: ----------- INITIAL REV: ObjectFilter<SearchResult> which contacts a remote Exclusion Oracle, and adds a field "ANNOTATION" with the public comment matching each result. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/filters/OracleAnnotationFilter.java 2008-06-24 20:52:20 UTC (rev 2300) @@ -0,0 +1,93 @@ +/* OracleAnnotationFilter + * + * $Id$ + * + * Created on 5:06:29 PM Jun 10, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourceindex.filters; + +import java.util.Date; + +import org.archive.accesscontrol.AccessControlClient; +import org.archive.accesscontrol.RuleOracleUnavailableException; +import org.archive.accesscontrol.model.Rule; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.Timestamp; +import org.archive.wayback.util.ObjectFilter; + +/** + * SearchResult filter class which contacts an access-control Oracle, using + * information from the public comment field to annotate SearchResult objects. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class OracleAnnotationFilter implements ObjectFilter<SearchResult> { + private AccessControlClient client = null; + private String oracleUrl = null; + private String who = null; + /* (non-Javadoc) + * @see org.archive.wayback.util.ObjectFilter#filterObject(java.lang.Object) + */ + public int filterObject(SearchResult o) { + if(client != null) { + String url = o.getAbsoluteUrl(); + Date capDate = Timestamp.parseAfter(o.getCaptureDate()).getDate(); + try { + Rule r = client.getRule(url, capDate, new Date(), who); + if(r != null) { + String publicComment = r.getPublicComment(); + o.put("ANOTATION", publicComment); + } + } catch (RuleOracleUnavailableException e) { + e.printStackTrace(); + // should not happen: we forcibly disable robots retrievals + } + } + return FILTER_INCLUDE; + } + + public AccessControlClient getClient() { + return client; + } + public void setClient(AccessControlClient client) { + client.setRobotLookupsEnabled(false); + this.client = client; + } + + public String getWho() { + return who; + } + + public void setWho(String who) { + this.who = who; + } + + public String getOracleUrl() { + return oracleUrl; + } + + public void setOracleUrl(String oracleUrl) { + this.oracleUrl = oracleUrl; + setClient(new AccessControlClient(oracleUrl)); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-24 21:42:41
|
Revision: 2302 http://archive-access.svn.sourceforge.net/archive-access/?rev=2302&view=rev Author: bradtofel Date: 2008-06-24 14:42:44 -0700 (Tue, 24 Jun 2008) Log Message: ----------- BUGFIX (ACC-21): now rewrites all(or at least more) url(XXX) tags in CSS. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2008-06-24 21:09:13 UTC (rev 2301) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2008-06-24 21:42:44 UTC (rev 2302) @@ -71,20 +71,12 @@ + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" + RAW_ATTR_VALUE; -// private static String cssUrlPatString = -// "url\\s*\\(\\s*(['\"]?.+?['\"]?)\\s*\\)"; private static String cssUrlPatString = "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)"; - private static String cssImportPatString = - "@import\\s+" + cssUrlPatString; - private static String cssImportNoUrlPatString = "@import\\s+([\"'].+?[\"'])"; - private static Pattern cssImportPattern = - Pattern.compile(cssImportPatString); - private static Pattern cssImportNoUrlPattern = Pattern.compile(cssImportNoUrlPatString); @@ -161,8 +153,9 @@ public static void markupCSSImports(StringBuilder page, ResultURIConverter uriConverter, String captureDate, String baseUrl) { - markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportPattern); +// markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportPattern); markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportNoUrlPattern); + markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssUrlPattern); } public static void markupStyleUrls(StringBuilder page, Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2008-06-24 21:09:13 UTC (rev 2301) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2008-06-24 21:42:44 UTC (rev 2302) @@ -333,6 +333,26 @@ checkCSSMarkup("@import 'http://foo.com/f.css';", "@import 'http://web.archive.org/wayback/2004/http://foo.com/f.css';", "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("@import \"http://foo.com/f.css\"; @import url( http://foo.com/f.css);", + "@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\"; @import url( http://web.archive.org/wayback/2004/http://foo.com/f.css);", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("@import \"http://foo.com/f.css\";\n@import url( http://foo.com/f.css);", + "@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\";\n@import url( http://web.archive.org/wayback/2004/http://foo.com/f.css);", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("@import url( http://foo.com/f.css);\n@import \"http://foo.com/f.css\";", + "@import url( http://web.archive.org/wayback/2004/http://foo.com/f.css);\n@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\";", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("background: #9caad1 url('/~alabama/images/bg.jpg') 0 0 repeat-y;", + "background: #9caad1 url('http://web.archive.org/wayback/2004/http://foo.com/~alabama/images/bg.jpg') 0 0 repeat-y;", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("background: #9caad1 url('/~alabama/images/bg.jpg') 0 0 repeat-y;", + "background: #9caad1 url('http://web.archive.org/wayback/2004/http://foo.com/~alabama/images/bg.jpg') 0 0 repeat-y;", + "http://web.archive.org/wayback/","2004","http://foo.com/b/"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-24 21:09:20
|
Revision: 2301 http://archive-access.svn.sourceforge.net/archive-access/?rev=2301&view=rev Author: bradtofel Date: 2008-06-24 14:09:13 -0700 (Tue, 24 Jun 2008) Log Message: ----------- FEATURE: added stripHTML() method - which is completely untested... perhaps a placeholder. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2008-06-24 20:52:20 UTC (rev 2300) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2008-06-24 21:09:13 UTC (rev 2301) @@ -320,7 +320,11 @@ "REF", "HREF"); } - + public void stripHTML() { + String stripped = sb.toString().replaceAll("\\<.*?>",""); + sb.setLength(0); + sb.append(stripped); + } /** * @param charSet * @throws IOException This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2299 http://archive-access.svn.sourceforge.net/archive-access/?rev=2299&view=rev Author: bradtofel Date: 2008-06-24 13:49:05 -0700 (Tue, 24 Jun 2008) Log Message: ----------- FEATURE: added annotater property, which can be set with an ObjectFilter<SearchResult> to annotate SearchResults as they come out of the ResourceIndex. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-06-20 02:15:20 UTC (rev 2298) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java 2008-06-24 20:49:05 UTC (rev 2299) @@ -81,6 +81,8 @@ private UrlCanonicalizer canonicalizer = null; private boolean dedupeRecords = false; + + private ObjectFilter<SearchResult> annotater = null; public LocalResourceIndex() { canonicalizer = new AggressiveUrlCanonicalizer(); @@ -285,7 +287,9 @@ forwardFilters.addFilter(new WindowEndFilter(resultsPerPage)); // int resultsPerDirection = (int) Math.floor(resultsPerPage / 2); // reverseFilters.addFilter(new WindowEndFilter(resultsPerDirection)); - + if(annotater != null) { + forwardFilters.addFilter(annotater); + } startKey = keyUrl; try { @@ -349,6 +353,9 @@ // add the start and end windowing filters: filters.addFilter(new WindowStartFilter(startResult)); filters.addFilter(new WindowEndFilter(resultsPerPage)); + if(annotater != null) { + filters.addFilter(annotater); + } try { filterRecords(source.getPrefixIterator(startKey), filters, results, true); @@ -384,6 +391,9 @@ // add the start and end windowing filters: filters.addFilter(new WindowStartFilter(startResult)); filters.addFilter(new WindowEndFilter(resultsPerPage)); + if(annotater != null) { + filters.addFilter(annotater); + } try { filterRecords(source.getPrefixIterator(startKey), filters, results, true); @@ -468,4 +478,12 @@ public void shutdown() throws IOException { source.shutdown(); } + + public ObjectFilter<SearchResult> getAnnotater() { + return annotater; + } + + public void setAnnotater(ObjectFilter<SearchResult> annotater) { + this.annotater = annotater; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2298 http://archive-access.svn.sourceforge.net/archive-access/?rev=2298&view=rev Author: bradtofel Date: 2008-06-19 19:15:20 -0700 (Thu, 19 Jun 2008) Log Message: ----------- INITIAL REV, REFACTOR: moved renderEception from ReplayDispatcher, and ReplayRenderer into this class. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2008-06-20 02:15:20 UTC (rev 2298) @@ -0,0 +1,194 @@ +/* BaseExceptionRenderer + * + * $Id$ + * + * Created on 6:27:28 PM Jun 10, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.exception; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.ExceptionRenderer; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.UIResults; +import org.archive.wayback.core.WaybackRequest; + +/** + * Default implementation responsible for outputting error responses to users + * for expected failure situations, for both Replay and Query requests. + * + * Has logic to return errors as XML, if in query mode, and if user requested + * XML. + * + * Has logic to render errors as CSS, Javascript, and blank images, if the + * request is Replay mode, embedded, and of an obvious type from the request URL + * + * @author brad + * @version $Date$, $Revision$ + */ +public class BaseExceptionRenderer implements ExceptionRenderer { + private String xmlErrorJsp = "/jsp/XMLError.jsp"; + private String errorJsp = "/jsp/HTMLError.jsp"; + private String imageErrorJsp = "/jsp/HTMLError.jsp"; + private String javascriptErrorJsp = "/jsp/JavaScriptError.jsp"; + private String cssErrorJsp = "/jsp/CSSError.jsp"; + + protected final Pattern IMAGE_REGEX = Pattern + .compile(".*\\.(jpg|jpeg|gif|png|bmp|tiff|tif)$"); + + /* ERROR HANDLING RESPONSES: */ + + private boolean requestIsEmbedded(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + // without a wbRequest, assume it is not embedded: send back HTML + if (wbRequest == null) { + return false; + } + String referer = wbRequest.get(WaybackConstants.REQUEST_REFERER_URL); + return (referer != null && referer.length() > 0); + } + + private boolean requestIsImage(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + if (requestUrl == null) + return false; + Matcher matcher = IMAGE_REGEX.matcher(requestUrl); + return (matcher != null && matcher.matches()); + } + + private boolean requestIsJavascript(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + return (requestUrl != null) && requestUrl.endsWith(".js"); + } + + private boolean requestIsCSS(HttpServletRequest httpRequest, + WaybackRequest wbRequest) { + + String requestUrl = wbRequest.get(WaybackConstants.REQUEST_URL); + return (requestUrl != null) && requestUrl.endsWith(".css"); + } + + /* (non-Javadoc) + * @see org.archive.wayback.ExceptionRenderer#renderException(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.exception.WaybackException) + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) throws ServletException, IOException { + + // the "standard HTML" response handler: + String finalJspPath = errorJsp; + + if(wbRequest.isQueryRequest()) { + + if(wbRequest.containsKey(WaybackConstants.REQUEST_XML_DATA)) { + finalJspPath = xmlErrorJsp; + } + + } else if (requestIsEmbedded(httpRequest, wbRequest)) { + + // try to not cause client errors by sending the HTML response if + // this request is ebedded, and is obviously one of the special + // types: + + + if (requestIsJavascript(httpRequest, wbRequest)) { + + finalJspPath = javascriptErrorJsp; + + } else if (requestIsCSS(httpRequest, wbRequest)) { + + finalJspPath = cssErrorJsp; + + } else if (requestIsImage(httpRequest, wbRequest)) { + + finalJspPath = imageErrorJsp; + + } + } + + httpRequest.setAttribute("exception", exception); + UIResults uiResults = new UIResults(wbRequest); + uiResults.storeInRequest(httpRequest, finalJspPath); + + RequestDispatcher dispatcher = httpRequest + .getRequestDispatcher(finalJspPath); + if(dispatcher == null) { + throw new ServletException("Null dispatcher for " + finalJspPath); + } + dispatcher.forward(httpRequest, httpResponse); + } + + public String getErrorJsp() { + return errorJsp; + } + + public void setErrorJsp(String errorJsp) { + this.errorJsp = errorJsp; + } + + /** + * @return the xmlErrorJsp + */ + public String getXmlErrorJsp() { + return xmlErrorJsp; + } + + /** + * @param xmlErrorJsp the xmlErrorJsp to set + */ + public void setXmlErrorJsp(String xmlErrorJsp) { + this.xmlErrorJsp = xmlErrorJsp; + } + + public String getImageErrorJsp() { + return imageErrorJsp; + } + + public void setImageErrorJsp(String imageErrorJsp) { + this.imageErrorJsp = imageErrorJsp; + } + + public String getJavascriptErrorJsp() { + return javascriptErrorJsp; + } + + public void setJavascriptErrorJsp(String javascriptErrorJsp) { + this.javascriptErrorJsp = javascriptErrorJsp; + } + + public String getCssErrorJsp() { + return cssErrorJsp; + } + + public void setCssErrorJsp(String cssErrorJsp) { + this.cssErrorJsp = cssErrorJsp; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-20 01:43:28
|
Revision: 2297 http://archive-access.svn.sourceforge.net/archive-access/?rev=2297&view=rev Author: bradtofel Date: 2008-06-19 18:43:37 -0700 (Thu, 19 Jun 2008) Log Message: ----------- REFACTOR: moving error rendering responsibility to separate interface, and out of ReplayRenderer. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ExceptionRenderer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ExceptionRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ExceptionRenderer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/ExceptionRenderer.java 2008-06-20 01:43:37 UTC (rev 2297) @@ -0,0 +1,59 @@ +/* ExceptionRenderer + * + * $Id$ + * + * Created on 6:26:05 PM Jun 10, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.exception.WaybackException; + +/** + * Implementors are responsible for drawing errors. + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface ExceptionRenderer { + /** + * Render the contents of a WaybackException in either html, javascript, or + * css format, depending on the guessed context, so errors in embedded + * documents do not cause unneeded errors in the embedding document. + * + * @param httpRequest + * @param httpResponse + * @param wbRequest + * @param exception + * @throws ServletException + * @throws IOException + */ + public void renderException(HttpServletRequest httpRequest, + HttpServletResponse httpResponse, WaybackRequest wbRequest, + WaybackException exception) + throws ServletException, IOException; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-20 01:41:45
|
Revision: 2296 http://archive-access.svn.sourceforge.net/archive-access/?rev=2296&view=rev Author: bradtofel Date: 2008-06-19 18:41:50 -0700 (Thu, 19 Jun 2008) Log Message: ----------- IMPROVEMENT (ACC-25): now we allow "http:/" instead of just "http://" to begin URLs. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2008-06-20 01:28:54 UTC (rev 2295) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2008-06-20 01:41:50 UTC (rev 2296) @@ -348,7 +348,11 @@ */ public void setRequestUrl(String urlStr) throws URIException { if (!urlStr.startsWith("http://")) { - urlStr = "http://" + urlStr; + if(urlStr.startsWith("http:/")) { + urlStr = "http://" + urlStr.substring(6); + } else { + urlStr = "http://" + urlStr; + } } // If its not http, next line throws exception. TODO: Fix. UURI requestURI = UURIFactory.getInstance(urlStr); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2295 http://archive-access.svn.sourceforge.net/archive-access/?rev=2295&view=rev Author: bradtofel Date: 2008-06-19 18:28:54 -0700 (Thu, 19 Jun 2008) Log Message: ----------- TWEAK: removed duplicate logic that already happens in WaybackRequest.setRequestUrl(). Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2008-06-20 01:15:31 UTC (rev 2294) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2008-06-20 01:28:54 UTC (rev 2295) @@ -59,9 +59,6 @@ wbRequest = new WaybackRequest(); String dateStr = matcher.group(1); urlStr = matcher.group(2); - if (!urlStr.startsWith("http://")) { - urlStr = "http://" + urlStr; - } // The logic of the classic WM wrt timestamp bounding: // if 14-digits are specified, assume min-max range boundaries This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2294 http://archive-access.svn.sourceforge.net/archive-access/?rev=2294&view=rev Author: bradtofel Date: 2008-06-19 18:15:31 -0700 (Thu, 19 Jun 2008) Log Message: ----------- BUGFIX(ACC-24): now sets number of results requested on WaybackRequest object. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2008-06-17 17:17:21 UTC (rev 2293) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2008-06-20 01:15:31 UTC (rev 2294) @@ -94,7 +94,7 @@ } wbRequest.put(WaybackConstants.REQUEST_TYPE, WaybackConstants.REQUEST_REPLAY_QUERY); - + wbRequest.setResultsPerPage(maxRecords); return wbRequest; } public List<String> getLocalhostNames() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2293 http://archive-access.svn.sourceforge.net/archive-access/?rev=2293&view=rev Author: binzino Date: 2008-06-17 10:17:21 -0700 (Tue, 17 Jun 2008) Log Message: ----------- Fixed type-o in comment. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java Modified: trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java 2008-06-10 23:59:20 UTC (rev 2292) +++ trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java 2008-06-17 17:17:21 UTC (rev 2293) @@ -145,7 +145,7 @@ true ); // Set boost on range query? - // range.setBoolst( boost ); + // range.setBoost( boost ); output.add( range, ( clause.isProhibited() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-11 00:00:57
|
Revision: 2292 http://archive-access.svn.sourceforge.net/archive-access/?rev=2292&view=rev Author: bradtofel Date: 2008-06-10 16:59:20 -0700 (Tue, 10 Jun 2008) Log Message: ----------- TWEAK: closed <a> tag for SURT context help reference link. Modified Paths: -------------- trunk/archive-access/projects/access-control/oracle/src/main/webapp/WEB-INF/views/list_rules.jsp Modified: trunk/archive-access/projects/access-control/oracle/src/main/webapp/WEB-INF/views/list_rules.jsp =================================================================== --- trunk/archive-access/projects/access-control/oracle/src/main/webapp/WEB-INF/views/list_rules.jsp 2008-06-10 23:58:13 UTC (rev 2291) +++ trunk/archive-access/projects/access-control/oracle/src/main/webapp/WEB-INF/views/list_rules.jsp 2008-06-10 23:59:20 UTC (rev 2292) @@ -1,9 +1,15 @@ <%@ include file="header.inc"%> <form action="<c:url value="/admin"/>" method="GET" id="navForm"> -<label for="surtNavBox"><a href="http://crawler.archive.org/articles/user_manual/glossary.html#surt"><acronym title="Sort-friendly URI Reordering Transform">SURT</acronym> or URL:</label> <input size="50" - name="surt" value="<c:out value="${surt}"/>" id="surtNavBox" /> <input - type="submit" value="Go!" /></form> + <label for="surtNavBox"> + <a href="http://crawler.archive.org/articles/user_manual/glossary.html#surt"> + <acronym title="Sort-friendly URI Reordering Transform">SURT</acronym> + or URL: + </a> + </label> + <input size="50" name="surt" value="<c:out value="${surt}"/>" id="surtNavBox" /> + <input type="submit" value="Go!" /> +</form> <div id="breadcrumbsContainer"> <ul id="breadcrumbs"> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2291 http://archive-access.svn.sourceforge.net/archive-access/?rev=2291&view=rev Author: bradtofel Date: 2008-06-10 16:58:13 -0700 (Tue, 10 Jun 2008) Log Message: ----------- BUGFIX (unreported): using String.equals() not "==" when comparing policy "who" match. Modified Paths: -------------- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java 2008-06-10 23:57:11 UTC (rev 2290) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java 2008-06-10 23:58:13 UTC (rev 2291) @@ -3,7 +3,6 @@ import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; -import java.util.SortedSet; import org.apache.commons.lang.ArrayUtils; @@ -348,7 +347,8 @@ * @return */ public boolean matches(String surt, Date captureDate, Date retrievalDate, String who2) { - return (who == null || "".equals(who) || who == who2) && matches(surt, captureDate, retrievalDate); + return (who == null || who.length() == 0 || who.equals(who2)) + && matches(surt, captureDate, retrievalDate); } public boolean isExactMatch() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2290 http://archive-access.svn.sourceforge.net/archive-access/?rev=2290&view=rev Author: bradtofel Date: 2008-06-10 16:57:11 -0700 (Tue, 10 Jun 2008) Log Message: ----------- TWEAK: removed unused 'done' variable. Modified Paths: -------------- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java 2008-06-10 21:36:32 UTC (rev 2289) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java 2008-06-10 23:57:11 UTC (rev 2290) @@ -68,7 +68,6 @@ Date retrievalDate, String who) { NewSurtTokenizer tok = new NewSurtTokenizer(surt); - boolean done = false; for (String key: tok.getSearchList()) { Iterable<Rule> rules = rulemap.get(key); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2008-06-10 21:36:36
|
Revision: 2289 http://archive-access.svn.sourceforge.net/archive-access/?rev=2289&view=rev Author: binzino Date: 2008-06-10 14:36:32 -0700 (Tue, 10 Jun 2008) Log Message: ----------- Added GroupedQueryFilter, essentially a RawFieldQueryFilter that allows for multiple values. Added use thereof to conf/nutch-site.xml Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/ConfigurableQueryFilter.java Modified: trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml =================================================================== --- trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml 2008-06-05 22:49:14 UTC (rev 2288) +++ trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml 2008-06-10 21:36:32 UTC (rev 2289) @@ -33,17 +33,21 @@ <property> <!-- Configure the 'query-nutchwax' plugin. Specify which fields to make searchable via "field:[term|phrase]" query syntax, and whether they are "raw" fields or not. - The specification format is "raw:name:lowercase:boost" or "field:name:boost". Default values are + The specification format is one of: + field:<name>:<boost> + raw:<name>:<lowercase>:<boost> + group:<name>:<lowercase>:<delimiter>:<boost> + Default values are lowercase = true + delimiter = "," boost = 1.0f - There is no "lowercase" property for "field" specification because the Nutch FieldQueryFilter doesn't expose the option, unlike the RawFieldQueryFilter. - AFAICT, the order isn't important. --> + --> <!-- We do *not* use this filter for handling "date" queries, there is a specific filter for that: DateQueryFilter --> <name>nutchwax.filter.query</name> <value> raw:arcname:false - raw:collection - raw:type + group:collection + group:type field:anchor field:content field:host Modified: trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/ConfigurableQueryFilter.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/ConfigurableQueryFilter.java 2008-06-05 22:49:14 UTC (rev 2288) +++ trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/ConfigurableQueryFilter.java 2008-06-10 21:36:32 UTC (rev 2289) @@ -24,10 +24,14 @@ import java.util.ArrayList; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.index.Term; import org.apache.nutch.searcher.QueryFilter; import org.apache.nutch.searcher.QueryException; import org.apache.nutch.searcher.Query; +import org.apache.nutch.searcher.Query.Clause; import org.apache.nutch.searcher.FieldQueryFilter; import org.apache.nutch.searcher.RawFieldQueryFilter; import org.apache.hadoop.conf.Configuration; @@ -130,10 +134,40 @@ // TODO: Warning, but ignore it. } } + QueryFilter filter = new RawFieldQueryFilterImpl( name, lowerCase, boost ); - + this.filters.add( filter ); } + else if ( "group".equals( spec[0] ) ) + { + String name = spec[1]; + boolean lowerCase = true; + String delimiter = ","; + float boost = 1.0f; + if ( spec.length > 2 ) + { + lowerCase = Boolean.parseBoolean( spec[2] ); + } + if ( spec.length > 3 ) + { + delimiter = spec[3]; + } + if ( spec.length > 4 ) + { + try + { + boost = Float.parseFloat( spec[4] ); + } + catch ( NumberFormatException nfe ) + { + // TODO: Warning, but ignore it. + } + } + QueryFilter filter = new GroupedQueryFilter( name, delimiter, lowerCase, boost ); + + this.filters.add( filter ); + } else { // TODO: Warning uknown filter type @@ -175,4 +209,78 @@ } } + public class GroupedQueryFilter implements QueryFilter + { + private String field; + private String delimiter; + private boolean lowerCase; + private float boost; + private Configuration conf; + + /** Construct for the named field, potentially lowercasing query values.*/ + public GroupedQueryFilter( String field, String delimiter, boolean lowerCase, float boost ) + { + this.field = field; + this.delimiter = delimiter; + this.lowerCase = lowerCase; + this.boost = boost; + + // Use the same conf as the owning instance. + this.setConf( ConfigurableQueryFilter.this.conf ); + } + + public BooleanQuery filter( Query input, BooleanQuery output ) + throws QueryException + { + // examine each clause in the Nutch query + for ( Clause c : input.getClauses() ) + { + // skip non-matching clauses + if ( !c.getField( ).equals( field ) ) continue; + + // get the field value from the clause + // raw fields are guaranteed to be Terms, not Phrases + String values = c.getTerm().toString(); + + BooleanQuery group = new BooleanQuery( output.isCoordDisabled( ) ); + for ( String value : values.split( this.delimiter ) ) + { + if (lowerCase) value = value.toLowerCase(); + + // Create a Lucene TermQuery for this value + TermQuery term = new TermQuery( new Term( field, value ) ); + + term.setBoost(boost); + + // Add it to the group + group.add( term, BooleanClause.Occur.SHOULD ); + } + + // Finally add the group to the overall query. The group's + // must/not/should is taken from the original Nutch clause + // with the multiple values. + output.add( group, (c.isProhibited() + ? BooleanClause.Occur.MUST_NOT + : (c.isRequired() + ? BooleanClause.Occur.MUST + : BooleanClause.Occur.SHOULD + ) + )); + } + + // return the modified Lucene query + return output; + } + + public void setConf( Configuration conf ) + { + this.conf = conf; + } + + public Configuration getConf( ) + { + return this.conf; + } + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-05 22:49:08
|
Revision: 2288 http://archive-access.svn.sourceforge.net/archive-access/?rev=2288&view=rev Author: bradtofel Date: 2008-06-05 15:49:14 -0700 (Thu, 05 Jun 2008) Log Message: ----------- MOVED: Tests to conform with new packages, class names, and method names. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDBTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/ResoruceFileLocationDBLogTest.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java 2008-06-05 21:58:07 UTC (rev 2287) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java 2008-06-05 22:49:14 UTC (rev 2288) @@ -1,95 +0,0 @@ -/* FileLocationDBLogTest - * - * $Id$ - * - * Created on 4:54:04 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.File; -import java.util.Iterator; -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBLogTest extends TestCase { - FileLocationDBLog log; - protected void setUp() throws Exception { - super.setUp(); - File tmp = File.createTempFile("fldb","log"); - - log = new FileLocationDBLog(tmp.getAbsolutePath()); - } - - /* - * @see TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - if(!log.delete()) { - fail("FAILED to delete tmp file"); - } - } - - /** - * @throws Exception - */ - public void testEmptyFile() throws Exception { - String newArc1 = "foo.arc.gz"; - String newArc2 = "bar.arc.gz"; - long mark1 = log.getCurrentMark(); - assertEquals(mark1,0); - Iterator<String> itr = log.getArcsBetweenMarks(0,0); - assertFalse(itr.hasNext()); - log.addArc(newArc1); - long mark2 = log.getCurrentMark(); - assertEquals(newArc1.length() + 1,mark2); - itr = log.getArcsBetweenMarks(mark1,mark2); - assertTrue(itr.hasNext()); - String gotArc = (String) itr.next(); - assertFalse(itr.hasNext()); - assertTrue(newArc1.equals(gotArc)); - log.addArc(newArc2); - long mark3 = log.getCurrentMark(); - assertEquals(newArc1.length() + newArc2.length() + 2, mark3); - - itr = log.getArcsBetweenMarks(mark2,mark3); - assertTrue(itr.hasNext()); - gotArc = (String) itr.next(); - assertFalse(itr.hasNext()); - assertTrue(newArc2.equals(gotArc)); - - itr = log.getArcsBetweenMarks(mark1,mark3); - assertTrue(itr.hasNext()); - gotArc = (String) itr.next(); - assertTrue(newArc1.equals(gotArc)); - - assertTrue(itr.hasNext()); - gotArc = (String) itr.next(); - assertTrue(newArc2.equals(gotArc)); - - assertFalse(itr.hasNext()); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java 2008-06-05 21:58:07 UTC (rev 2287) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java 2008-06-05 22:49:14 UTC (rev 2288) @@ -1,199 +0,0 @@ -/* FileLocationDBTest - * - * $Id$ - * - * Created on 5:17:23 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; - -import org.archive.wayback.resourcestore.http.FileLocationDB; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBTest extends TestCase { - private FileLocationDB db = null; - private String dbPath = null; - private String dbName = null; - private File tmpFile = null; - private File tmpLogFile = null; - - /* - * @see TestCase#setUp() - */ - protected void setUp() throws Exception { - - tmpFile = File.createTempFile("test-FileLocationDB",".tmp"); - tmpLogFile = File.createTempFile("test-FileLocationDB",".log"); - assertTrue(tmpFile.delete()); - assertTrue(tmpFile.mkdirs()); - dbPath = tmpFile.getAbsolutePath(); - dbName = "test-FileLocationDB"; - db = new FileLocationDB(); - - db.setBdbName(dbName); - db.setBdbPath(dbPath); - db.setLogPath(tmpLogFile.getAbsolutePath()); - db.init(); - - super.setUp(); - } - - /* - * @see TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - db.shutdownDB(); - if(tmpFile.isDirectory()) { - File files[] = tmpFile.listFiles(); - for(int i = 0; i < files.length; i++) { - assertTrue(files[i].isFile()); - assertTrue(files[i].delete()); - } - assertTrue(tmpFile.delete()); - } - assertTrue(tmpLogFile.delete()); - } - - private void testMarkLength(long start, long end, int count) throws IOException { - Iterator<String> itr = db.getArcsBetweenMarks(start,end); - int found = 0; - while(itr.hasNext()) { - itr.next(); - found++; - } - assertEquals(count,found); - } - - /** - * Test method for 'org.archive.wayback.http11resourcestore.FileLocationDB - */ - public void testDB() { - assertNotNull(db); - String urls[] = null; - try { - // empty results OK: - urls = db.arcToUrls("arc1"); - assertNull(urls); - //assertEquals(urls.length,0); - testMarkLength(0,0,0); - - // add an URL, and get it back: - db.addArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url1",urls[0]); - testMarkLength(0,5,1); - - // add the same URL again, verify only comes back once: - db.addArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url1",urls[0]); - testMarkLength(0,5,1); - - // check for empty results for a diff arc: - urls = db.arcToUrls("arc2"); - assertNull(urls); - //assertEquals(urls.length,0); - - // add a diff URL for first arc, verify both come back: - db.addArcUrl("arc1","url2"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(2,urls.length); - assertEquals("url1",urls[0]); - assertEquals("url2",urls[1]); - testMarkLength(0,5,1); - - // still nothing for arc2: - urls = db.arcToUrls("arc2"); - assertNull(urls); - //assertEquals(urls.length,0); - - // add an URL for arc2, and get it back: - db.addArcUrl("arc2","url2-1"); - urls = db.arcToUrls("arc2"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2-1",urls[0]); - testMarkLength(0,10,2); - testMarkLength(5,10,1); - - // remove unknown URL for arc2 - db.removeArcUrl("arc2","url2-2"); - urls = db.arcToUrls("arc2"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2-1",urls[0]); - - // remove the right URL for arc2 - db.removeArcUrl("arc2","url2-1"); - urls = db.arcToUrls("arc2"); - assertNull(urls); - //assertEquals(urls.length,0); - - // remove non-existant URL for first arc, verify two still come back - db.removeArcUrl("arc1","url-non"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(2,urls.length); - assertEquals("url1",urls[0]); - assertEquals("url2",urls[1]); - - // remove a right URL for arc1 - db.removeArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2",urls[0]); - - // remove a now wrong URL for arc1 - db.removeArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2",urls[0]); - - // remove a last URL for arc1 - db.removeArcUrl("arc1","url2"); - urls = db.arcToUrls("arc1"); - assertNull(urls); - //assertEquals(urls.length,0); - - } catch (Exception e) { - fail("arcToUrls threw " + e.getMessage()); - } - - } -} Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDBTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDBTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDBTest.java 2008-06-05 22:49:14 UTC (rev 2288) @@ -0,0 +1,199 @@ +/* FileLocationDBTest + * + * $Id: FileLocationDBTest.java 1856 2007-07-25 00:17:15Z bradtofel $ + * + * Created on 5:17:23 PM Aug 21, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +import org.archive.wayback.resourcestore.locationdb.BDBResourceFileLocationDB; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date: 2007-07-24 17:17:15 -0700 (Tue, 24 Jul 2007) $, $Revision: 1856 $ + */ +public class BDBResourceFileLocationDBTest extends TestCase { + private BDBResourceFileLocationDB db = null; + private String dbPath = null; + private String dbName = null; + private File tmpFile = null; + private File tmpLogFile = null; + + /* + * @see TestCase#setUp() + */ + protected void setUp() throws Exception { + + tmpFile = File.createTempFile("test-FileLocationDB",".tmp"); + tmpLogFile = File.createTempFile("test-FileLocationDB",".log"); + assertTrue(tmpFile.delete()); + assertTrue(tmpFile.mkdirs()); + dbPath = tmpFile.getAbsolutePath(); + dbName = "test-FileLocationDB"; + db = new BDBResourceFileLocationDB(); + + db.setBdbName(dbName); + db.setBdbPath(dbPath); + db.setLogPath(tmpLogFile.getAbsolutePath()); + db.init(); + + super.setUp(); + } + + /* + * @see TestCase#tearDown() + */ + protected void tearDown() throws Exception { + super.tearDown(); + db.shutdown(); + if(tmpFile.isDirectory()) { + File files[] = tmpFile.listFiles(); + for(int i = 0; i < files.length; i++) { + assertTrue(files[i].isFile()); + assertTrue(files[i].delete()); + } + assertTrue(tmpFile.delete()); + } + assertTrue(tmpLogFile.delete()); + } + + private void testMarkLength(long start, long end, int count) throws IOException { + Iterator<String> itr = db.getNamesBetweenMarks(start,end); + int found = 0; + while(itr.hasNext()) { + itr.next(); + found++; + } + assertEquals(count,found); + } + + /** + * Test method for 'org.archive.wayback.http11resourcestore.FileLocationDB + */ + public void testDB() { + assertNotNull(db); + String urls[] = null; + try { + // empty results OK: + urls = db.nameToUrls("arc1"); + assertNull(urls); + //assertEquals(urls.length,0); + testMarkLength(0,0,0); + + // add an URL, and get it back: + db.addNameUrl("arc1","url1"); + urls = db.nameToUrls("arc1"); + assertNotNull(urls); + assertEquals(1,urls.length); + assertEquals("url1",urls[0]); + testMarkLength(0,5,1); + + // add the same URL again, verify only comes back once: + db.addNameUrl("arc1","url1"); + urls = db.nameToUrls("arc1"); + assertNotNull(urls); + assertEquals(1,urls.length); + assertEquals("url1",urls[0]); + testMarkLength(0,5,1); + + // check for empty results for a diff arc: + urls = db.nameToUrls("arc2"); + assertNull(urls); + //assertEquals(urls.length,0); + + // add a diff URL for first arc, verify both come back: + db.addNameUrl("arc1","url2"); + urls = db.nameToUrls("arc1"); + assertNotNull(urls); + assertEquals(2,urls.length); + assertEquals("url1",urls[0]); + assertEquals("url2",urls[1]); + testMarkLength(0,5,1); + + // still nothing for arc2: + urls = db.nameToUrls("arc2"); + assertNull(urls); + //assertEquals(urls.length,0); + + // add an URL for arc2, and get it back: + db.addNameUrl("arc2","url2-1"); + urls = db.nameToUrls("arc2"); + assertNotNull(urls); + assertEquals(1,urls.length); + assertEquals("url2-1",urls[0]); + testMarkLength(0,10,2); + testMarkLength(5,10,1); + + // remove unknown URL for arc2 + db.removeNameUrl("arc2","url2-2"); + urls = db.nameToUrls("arc2"); + assertNotNull(urls); + assertEquals(1,urls.length); + assertEquals("url2-1",urls[0]); + + // remove the right URL for arc2 + db.removeNameUrl("arc2","url2-1"); + urls = db.nameToUrls("arc2"); + assertNull(urls); + //assertEquals(urls.length,0); + + // remove non-existant URL for first arc, verify two still come back + db.removeNameUrl("arc1","url-non"); + urls = db.nameToUrls("arc1"); + assertNotNull(urls); + assertEquals(2,urls.length); + assertEquals("url1",urls[0]); + assertEquals("url2",urls[1]); + + // remove a right URL for arc1 + db.removeNameUrl("arc1","url1"); + urls = db.nameToUrls("arc1"); + assertNotNull(urls); + assertEquals(1,urls.length); + assertEquals("url2",urls[0]); + + // remove a now wrong URL for arc1 + db.removeNameUrl("arc1","url1"); + urls = db.nameToUrls("arc1"); + assertNotNull(urls); + assertEquals(1,urls.length); + assertEquals("url2",urls[0]); + + // remove a last URL for arc1 + db.removeNameUrl("arc1","url2"); + urls = db.nameToUrls("arc1"); + assertNull(urls); + //assertEquals(urls.length,0); + + } catch (Exception e) { + fail("arcToUrls threw " + e.getMessage()); + } + + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/ResoruceFileLocationDBLogTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/ResoruceFileLocationDBLogTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/locationdb/ResoruceFileLocationDBLogTest.java 2008-06-05 22:49:14 UTC (rev 2288) @@ -0,0 +1,98 @@ +/* FileLocationDBLogTest + * + * $Id: FileLocationDBLogTest.java 1856 2007-07-25 00:17:15Z bradtofel $ + * + * Created on 4:54:04 PM Aug 21, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.File; +import java.util.Iterator; + +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBLog; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date: 2007-07-24 17:17:15 -0700 (Tue, 24 Jul 2007) $, $Revision: 1856 $ + */ +public class ResoruceFileLocationDBLogTest extends TestCase { + ResourceFileLocationDBLog log; + protected void setUp() throws Exception { + super.setUp(); + File tmp = File.createTempFile("fldb","log"); + + log = new ResourceFileLocationDBLog(tmp.getAbsolutePath()); + } + + /* + * @see TestCase#tearDown() + */ + protected void tearDown() throws Exception { + super.tearDown(); + if(!log.delete()) { + fail("FAILED to delete tmp file"); + } + } + + /** + * @throws Exception + */ + public void testEmptyFile() throws Exception { + String newArc1 = "foo.arc.gz"; + String newArc2 = "bar.arc.gz"; + long mark1 = log.getCurrentMark(); + assertEquals(mark1,0); + Iterator<String> itr = log.getNamesBetweenMarks(0,0); + assertFalse(itr.hasNext()); + log.addName(newArc1); + long mark2 = log.getCurrentMark(); + assertEquals(newArc1.length() + 1,mark2); + itr = log.getNamesBetweenMarks(mark1,mark2); + assertTrue(itr.hasNext()); + String gotArc = (String) itr.next(); + assertFalse(itr.hasNext()); + assertTrue(newArc1.equals(gotArc)); + log.addName(newArc2); + long mark3 = log.getCurrentMark(); + assertEquals(newArc1.length() + newArc2.length() + 2, mark3); + + itr = log.getNamesBetweenMarks(mark2,mark3); + assertTrue(itr.hasNext()); + gotArc = (String) itr.next(); + assertFalse(itr.hasNext()); + assertTrue(newArc2.equals(gotArc)); + + itr = log.getNamesBetweenMarks(mark1,mark3); + assertTrue(itr.hasNext()); + gotArc = (String) itr.next(); + assertTrue(newArc1.equals(gotArc)); + + assertTrue(itr.hasNext()); + gotArc = (String) itr.next(); + assertTrue(newArc2.equals(gotArc)); + + assertFalse(itr.hasNext()); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-05 21:58:00
|
Revision: 2287 http://archive-access.svn.sourceforge.net/archive-access/?rev=2287&view=rev Author: bradtofel Date: 2008-06-05 14:58:07 -0700 (Thu, 05 Jun 2008) Log Message: ----------- INITIAL REV: failed to add new ResourceFileLocationDB interface last checkin. Changed log append method name to addName() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2008-06-05 21:52:54 UTC (rev 2286) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2008-06-05 21:58:07 UTC (rev 2287) @@ -29,8 +29,8 @@ import java.io.InputStreamReader; import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.resourcestore.locationdb2.ResourceFileLocationDB; -import org.archive.wayback.resourcestore.locationdb2.ResourceFileLocationDBLog; +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBLog; import org.archive.wayback.util.CloseableIterator; import com.sleepycat.je.DatabaseException; @@ -146,7 +146,7 @@ } else { // null or empty value newValue = url; - if(oldValue == null) log.addFile(name); + if(oldValue == null) log.addName(name); } // did we find a value? Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java 2008-06-05 21:58:07 UTC (rev 2287) @@ -0,0 +1,59 @@ +/* ResourceFileLocationDB + * + * $Id$ + * + * Created on 2:01:29 PM Jun 5, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.IOException; + +import org.archive.wayback.util.CloseableIterator; + +/** + * Interface to a database that maps file key Strings to zero or more value + * Strings. Additionally, the database supports a "getCurrentMark" call that + * will return an long value. The results of two independent calls to + * getCurrentMark() can be passed to getNamesBetweenMarks() to retrieve an + * Iterator listing all key Strings added to the database between the two calls + * to getCurrentMark() + * + * @author brad + * @version $Date$, $Revision$ + */ +public interface ResourceFileLocationDB { + + public void shutdown() throws IOException; + + public String[] nameToUrls(final String name) + throws IOException; + + public void addNameUrl(final String name, final String url) + throws IOException; + + public void removeNameUrl(final String name, final String url) + throws IOException; + + public CloseableIterator<String> getNamesBetweenMarks(long start, long end) + throws IOException; + + public long getCurrentMark() throws IOException; +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2008-06-05 21:52:54 UTC (rev 2286) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2008-06-05 21:58:07 UTC (rev 2287) @@ -31,7 +31,6 @@ import java.io.IOException; import java.io.RandomAccessFile; -import org.archive.wayback.exception.ConfigurationException; import org.archive.wayback.util.CloseableIterator; import org.archive.wayback.util.flatfile.RecordIterator; @@ -55,23 +54,23 @@ /** * @param pathname - * @throws ConfigurationException + * @throws IOException */ - public ResourceFileLocationDBLog(String pathname) throws ConfigurationException { + public ResourceFileLocationDBLog(String pathname) throws IOException { super(pathname); if (!isFile()) { if (exists()) { - throw new ConfigurationException("path(" + pathname + throw new IOException("path(" + pathname + ") exists but is not a file!"); } try { if (!createNewFile()) { - throw new ConfigurationException( + throw new IOException( "Unable to create empty file " + pathname); } } catch (IOException e) { e.printStackTrace(); - throw new ConfigurationException("Unable to create empty file " + throw new IOException("Unable to create empty file " + pathname); } } @@ -87,10 +86,10 @@ /** * @param start * @param end - * @return CleanableIterator that returns all arcs between start and end + * @return CleanableIterator that returns all names between start and end * @throws IOException */ - public CloseableIterator<String> getArcsBetweenMarks(long start, long end) + public CloseableIterator<String> getNamesBetweenMarks(long start, long end) throws IOException { RandomAccessFile raf = new RandomAccessFile(this, "r"); @@ -100,12 +99,12 @@ } /** - * @param arcName + * @param name * @throws IOException */ - public synchronized void addArc(String arcName) throws IOException { + public synchronized void addName(String name) throws IOException { FileWriter writer = new FileWriter(this, true); - writer.write(arcName + "\n"); + writer.write(name + "\n"); writer.flush(); writer.close(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-05 21:52:47
|
Revision: 2286 http://archive-access.svn.sourceforge.net/archive-access/?rev=2286&view=rev Author: bradtofel Date: 2008-06-05 14:52:54 -0700 (Thu, 05 Jun 2008) Log Message: ----------- INTERFACE: changed ResourceFileLocationDB to an interface, with method not specific to ARCs, created BDB and remote implementations of that interface. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java (from rev 2284, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/BDBResourceFileLocationDB.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -0,0 +1,316 @@ +/* BDBResourceFileLocationDB + * + * $Id$ + * + * Created on 3:08:59 PM Aug 18, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.archive.wayback.bdb.BDBRecordSet; +import org.archive.wayback.resourcestore.locationdb2.ResourceFileLocationDB; +import org.archive.wayback.resourcestore.locationdb2.ResourceFileLocationDBLog; +import org.archive.wayback.util.CloseableIterator; + +import com.sleepycat.je.DatabaseException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class BDBResourceFileLocationDB implements ResourceFileLocationDB { + + private final static String urlDelimiter = " "; + + private final static String urlDelimiterRE = " "; + + private ResourceFileLocationDBLog log; + private BDBRecordSet bdb = null; + private String logPath = null; + private String bdbPath = null; + private String bdbName = null; + + private IOException wrapDBException(DatabaseException e) { + return new IOException(e.getLocalizedMessage()); + } + private String get(String key) throws IOException { + try { + return bdb.get(key); + } catch (DatabaseException e) { + throw wrapDBException(e); + } + } + private void put(String key, String value) throws IOException { + try { + bdb.put(key,value); + } catch (DatabaseException e) { + throw wrapDBException(e); + } + } + private void delete(String key) throws IOException { + try { + bdb.delete(key); + } catch (DatabaseException e) { + throw wrapDBException(e); + } + } + public void shutdown() throws IOException { + try { + bdb.shutdownDB(); + } catch (DatabaseException e) { + throw wrapDBException(e); + } + } + + public void init() throws IOException { + if(logPath == null) { + throw new IOException("No logPath"); + } + log = new ResourceFileLocationDBLog(logPath); + bdb = new BDBRecordSet(); + try { + bdb.initializeDB(bdbPath,bdbName); + } catch (DatabaseException e) { + throw wrapDBException(e); + } + } + + /** + * return an array of String URLs for all known locations of name in the DB. + * @param name + * @return String[] of URLs to name + * @throws IOException + */ + public String[] nameToUrls(final String name) throws IOException { + + String[] urls = null; + String valueString = get(name); + if(valueString != null && valueString.length() > 0) { + urls = valueString.split(urlDelimiterRE); + } + return urls; + } + + /** + * add an url location for a name, unless it already exists + * @param name + * @param url + * @throws IOException + */ + public void addNameUrl(final String name, final String url) + throws IOException { + + // need to first see if there is already an entry for this name. + // if not, add url as the value. + // if so, check the current url locations for name + // if url exists, do nothing + // if url does not exist, add, and set that as the value. + + String newValue = null; + String oldValue = get(name); + if(oldValue != null && oldValue.length() > 0) { + String curUrls[] = oldValue.split(urlDelimiterRE); + boolean found = false; + for(int i=0; i < curUrls.length; i++) { + if(url.equals(curUrls[i])) { + found = true; + break; + } + } + if(found == false) { + newValue = oldValue + " " + url; + } + } else { + // null or empty value + newValue = url; + if(oldValue == null) log.addFile(name); + } + + // did we find a value? + if(newValue != null) { + put(name,newValue); + } + } + + /** + * remove a single url location for an name, if it exists + * @param name + * @param url + * @throws IOException + */ + public void removeNameUrl(final String name, final String url) + throws IOException { + // need to first see if there is already an entry for this name. + // if not, do nothing + // if so, loop thru all current url locations for name + // keep any that are not url + // if any locations are left, update to the new value, sans url + // if none are left, remove the entry from the db + + StringBuilder newValue = new StringBuilder(); + String oldValue = get(name); + if(oldValue != null && oldValue.length() > 0) { + String curUrls[] = oldValue.split(urlDelimiterRE); + + for(int i=0; i < curUrls.length; i++) { + if(!url.equals(curUrls[i])) { + if(newValue.length() > 0) { + newValue.append(urlDelimiter); + } + newValue.append(curUrls[i]); + } + } + + if(newValue.length() > 0) { + + // update + put(name, newValue.toString()); + + } else { + + // remove the entry: + delete(name); + } + } + } + + /** + * @param start + * @param end + * @return Iterator for traversing arcs between start and end. + * @throws IOException + */ + public CloseableIterator<String> getNamesBetweenMarks(long start, long end) + throws IOException { + return log.getNamesBetweenMarks(start, end); + } + + /** + * @return current "Mark" for the log. Currently, it's just the length of + * the log file. + */ + public long getCurrentMark() { + return log.getCurrentMark(); + } + + /** + * @return the logPath + */ + public String getLogPath() { + return logPath; + } + + /** + * @param logPath the logPath to set + */ + public void setLogPath(String logPath) { + this.logPath = logPath; + } + + /** + * @return the bdbPath + */ + public String getBdbPath() { + return bdbPath; + } + + /** + * @param bdbPath the bdbPath to set + */ + public void setBdbPath(String bdbPath) { + this.bdbPath = bdbPath; + } + + /** + * @return the bdbName + */ + public String getBdbName() { + return bdbName; + } + + /** + * @param bdbName the bdbName to set + */ + public void setBdbName(String bdbName) { + this.bdbName = bdbName; + } + private static void USAGE(String message) { + System.err.print("USAGE: " + message + "\n" + + "\tDBDIR DBNAME LOGPATH\n" + + "\n" + + "\t\tread lines from STDIN formatted like:\n" + + "\t\t\tNAME<SPACE>URL\n" + + "\t\tand for each line, add to locationDB that file NAME is\n" + + "\t\tlocated at URL. Use locationDB in DBDIR at DBNAME, \n" + + "\t\tcreating if it does not exist.\n" + ); + System.exit(2); + } + + /** + * @param args + */ + public static void main(String[] args) { + if(args.length != 3) { + USAGE(""); + System.exit(1); + } + String bdbPath = args[0]; + String bdbName = args[1]; + String logPath = args[2]; + BDBResourceFileLocationDB db = new BDBResourceFileLocationDB(); + db.setBdbPath(bdbPath); + db.setBdbName(bdbName); + db.setLogPath(logPath); + BufferedReader r = new BufferedReader( + new InputStreamReader(System.in)); + String line; + int exitCode = 0; + try { + db.init(); + while((line = r.readLine()) != null) { + String parts[] = line.split(" "); + if(parts.length != 2) { + System.err.println("Bad input(" + line + ")"); + System.exit(2); + } + db.addNameUrl(parts[0],parts[1]); + System.out.println("Added\t" + parts[0] + "\t" + parts[1]); + } + } catch (IOException e) { + e.printStackTrace(); + exitCode = 1; + } finally { + try { + db.shutdown(); + } catch (IOException e) { + e.printStackTrace(); + exitCode = 1; + } + } + System.exit(exitCode); + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2008-06-05 21:37:36 UTC (rev 2285) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -29,20 +29,19 @@ import java.io.OutputStream; import java.net.URL; import java.net.URLConnection; -import java.text.ParseException; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.webapp.ServletRequestContext; -import com.sleepycat.je.DatabaseException; - /** + * ServletRequestContext interface which uses a ResourceFileLocationDB to + * reverse proxy an incoming HTTP request for a file by name to it's actual + * back-end location. This will also forward HTTP byte range requests to the + * final location. * - * * @author brad * @version $Date$, $Revision$ */ @@ -61,49 +60,46 @@ HttpServletResponse httpResponse) throws IOException, ServletException { - try { - String arc = httpRequest.getRequestURI(); - arc = arc.substring(arc.lastIndexOf('/')+1); - if(arc.length() == 0) { - throw new ParseException("no/invalid arc",0); - } - String urls[] = locationDB.arcToUrls(arc); + String name = httpRequest.getRequestURI(); + name = name.substring(name.lastIndexOf('/')+1); + if(name.length() == 0) { + httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, + "no/invalid name"); + } else { + + String urls[] = locationDB.nameToUrls(name); + if(urls == null || urls.length == 0) { - throw new DatabaseException("Unable to locate("+arc+")"); - } - String urlString = urls[0]; - String rangeHeader = httpRequest.getHeader(RANGE_HTTP_HEADER); - URL url = new URL(urlString); - URLConnection conn = url.openConnection(); - if(rangeHeader != null) { - conn.addRequestProperty(RANGE_HTTP_HEADER,rangeHeader); - } - InputStream is = conn.getInputStream(); - httpResponse.setStatus(HttpServletResponse.SC_OK); - String typeHeader = conn.getHeaderField(CONTENT_TYPE_HEADER); - if(typeHeader == null) { - typeHeader = CONTENT_TYPE; - } - httpResponse.setContentType(typeHeader); - OutputStream os = httpResponse.getOutputStream(); - int BUF_SIZE = 4096; - byte[] buffer = new byte[BUF_SIZE]; - try { - for (int r = -1; (r = is.read(buffer, 0, BUF_SIZE)) != -1;) { - os.write(buffer, 0, r); + + httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, + "Unable to locate("+name+")"); + } else { + + String urlString = urls[0]; + String rangeHeader = httpRequest.getHeader(RANGE_HTTP_HEADER); + URL url = new URL(urlString); + URLConnection conn = url.openConnection(); + if(rangeHeader != null) { + conn.addRequestProperty(RANGE_HTTP_HEADER,rangeHeader); } - } finally { - is.close(); + InputStream is = conn.getInputStream(); + httpResponse.setStatus(HttpServletResponse.SC_OK); + String typeHeader = conn.getHeaderField(CONTENT_TYPE_HEADER); + if(typeHeader == null) { + typeHeader = CONTENT_TYPE; + } + httpResponse.setContentType(typeHeader); + OutputStream os = httpResponse.getOutputStream(); + int BUF_SIZE = 4096; + byte[] buffer = new byte[BUF_SIZE]; + try { + for(int r = -1; (r = is.read(buffer, 0, BUF_SIZE)) != -1;) { + os.write(buffer, 0, r); + } + } finally { + is.close(); + } } - } catch (ParseException e) { - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, - e.getMessage()); - } catch (DatabaseException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, - e.getMessage()); } return true; } Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java (from rev 2284, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/RemoteResourceFileLocationDB.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -0,0 +1,427 @@ +/* FileLocationDBClient + * + * $Id$ + * + * Created on 5:59:49 PM Aug 21, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Arrays; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpStatus; +import org.apache.commons.httpclient.NameValuePair; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.methods.PostMethod; +import org.apache.commons.httpclient.util.ParameterFormatter; +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBServlet; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.WrappedCloseableIterator; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class RemoteResourceFileLocationDB implements ResourceFileLocationDB { + private static final Logger LOGGER = Logger.getLogger(RemoteResourceFileLocationDB + .class.getName()); + + private final static String ARC_SUFFIX = ".arc"; + private final static String ARC_GZ_SUFFIX = ".arc.gz"; + private final static String WARC_SUFFIX = ".warc"; + private final static String WARC_GZ_SUFFIX = ".warc.gz"; + private final static String OK_RESPONSE_PREFIX = "OK "; + private HttpClient client = null; + + private String serverUrl = null; + + /** + * @param serverUrl + */ + public RemoteResourceFileLocationDB(final String serverUrl) { + super(); + this.serverUrl = serverUrl; + this.client = new HttpClient(); + } + + /** + * @return long value representing the current end "mark" of the db log + * @throws IOException + */ + public long getCurrentMark() throws IOException { + NameValuePair[] args = { + new NameValuePair( + ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + ResourceFileLocationDBServlet.GETMARK_OPERATION), + }; + return Long.parseLong(doGetMethod(args)); + } + + /** + * @param start + * @param end + * @return Iterator of file names between marks start and end + * @throws IOException + */ + public CloseableIterator<String> getNamesBetweenMarks(long start, long end) + throws IOException { + NameValuePair[] args = { + new NameValuePair( + ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + ResourceFileLocationDBServlet.GETRANGE_OPERATION), + new NameValuePair( + ResourceFileLocationDBServlet.START_ARGUMENT, + String.valueOf(start)), + new NameValuePair( + ResourceFileLocationDBServlet.END_ARGUMENT, + String.valueOf(end)) + }; + return new WrappedCloseableIterator<String>( + Arrays.asList(doGetMethod(args).split("\n")).iterator()); + } + + /** + * return an array of String URLs for all known locations of the file + * in the DB. + * @param name + * @return String[] of URLs to arcName + * @throws IOException + */ + public String[] nameToUrls(final String name) throws IOException { + + NameValuePair[] args = { + new NameValuePair( + ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + ResourceFileLocationDBServlet.LOOKUP_OPERATION), + + new NameValuePair( + ResourceFileLocationDBServlet.NAME_ARGUMENT, + name) + }; + String locations = doGetMethod(args); + if(locations != null) { + return locations.split("\n"); + } + return null; + } + + + /** + * add an Url location for an arcName, unless it already exists + * @param name + * @param url + * @throws IOException + */ + public void addNameUrl(final String name, final String url) + throws IOException { + doPostMethod(ResourceFileLocationDBServlet.ADD_OPERATION, name, url); + } + + /** + * remove a single url location for a name, if it exists + * @param name + * @param url + * @throws IOException + */ + public void removeNameUrl(final String name, final String url) + throws IOException { + doPostMethod(ResourceFileLocationDBServlet.REMOVE_OPERATION, name, url); + } + + private String doGetMethod(NameValuePair[] data) throws IOException { + ParameterFormatter formatter = new ParameterFormatter(); + formatter.setAlwaysUseQuotes(false); + StringBuilder finalUrl = new StringBuilder(serverUrl); + if(data.length > 0) { + finalUrl.append("?"); + } + for(int i = 0; i < data.length; i++) { + if(i == 0) { + finalUrl.append("?"); + } else { + finalUrl.append("&"); + } + finalUrl.append(formatter.format(data[i])); + } + + GetMethod method = new GetMethod(finalUrl.toString()); + + int statusCode = client.executeMethod(method); + if (statusCode != HttpStatus.SC_OK) { + throw new IOException("Method failed: " + method.getStatusLine()); + } + String responseString = method.getResponseBodyAsString(); + if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { + if(responseString.startsWith(ResourceFileLocationDBServlet.NO_LOCATION_PREFIX)) { + return null; + } + throw new IOException(responseString); + } + return responseString.substring(OK_RESPONSE_PREFIX.length()+1); + } + + private void doPostMethod(final String operation, final String arcName, + final String arcUrl) + throws IOException { + PostMethod method = new PostMethod(serverUrl); + NameValuePair[] data = { + new NameValuePair(ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + operation), + new NameValuePair(ResourceFileLocationDBServlet.NAME_ARGUMENT, + arcName), + new NameValuePair(ResourceFileLocationDBServlet.URL_ARGUMENT, + arcUrl) + }; + method.setRequestBody(data); + int statusCode = client.executeMethod(method); + if (statusCode != HttpStatus.SC_OK) { + throw new IOException("Method failed: " + method.getStatusLine()); + } + String responseString = method.getResponseBodyAsString(); + if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { + throw new IOException(responseString); + } + } + + /* (non-Javadoc) + * @see org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB#shutdown() + */ + public void shutdown() throws IOException { + // NO-OP + } + + private static void USAGE(String message) { + System.err.print("USAGE: " + message + "\n" + + "\t[lookup|add|remove|sync] ...\n" + + "\n" + + "\t lookup LOCATION-DB-URL ARC\n" + + "\t\temit all known URLs for arc ARC\n" + + "\n" + + "\t add LOCATION-DB-URL ARC URL\n" + + "\t\tinform locationDB that ARC is located at URL\n" + + "\n" + + "\t remove LOCATION-DB-URL ARC URL\n" + + "\t\tremove reference to ARC at URL in locationDB\n" + + "\n" + + "\t sync LOCATION-DB-URL DIR DIR-URL\n" + + "\t\tscan directory DIR, and submit all ARC files therein\n" + + "\t\tto locationDB at url DIR-URL/ARC\n" + + "\n" + + "\t get-mark LOCATION-DB-URL\n" + + "\t\temit an identifier for the current marker in the \n" + + "\t\tlocationDB log. These identifiers can be used with the\n" + + "\t\tmark-range operation.\n" + + "\n" + + "\t mark-range LOCATION-DB-URL START END\n" + + "\t\temit to STDOUT one line with the name of all ARC files\n" + + "\t\tadded to the locationDB between marks START and END\n" + + "\n" + + "\t add-stream LOCATION-DB-URL\n" + + "\t\tread lines from STDIN formatted like:\n" + + "\t\t\tNAME<SPACE>URL\n" + + "\t\tand for each line, inform locationDB that file NAME is\n" + + "\t\tlocated at URL\n" + ); + System.exit(2); + } + + /** + * @param args + */ + public static void main(String[] args) { + if(args.length < 2) { + USAGE(""); + System.exit(1); + } + String operation = args[0]; + String dbUrl = args[1]; + if(!dbUrl.startsWith("http://")) { + USAGE("URL argument 1 must begin with http://"); + } + + RemoteResourceFileLocationDB locationClient = + new RemoteResourceFileLocationDB(dbUrl); + + if(operation.equalsIgnoreCase("add-stream")) { + BufferedReader r = new BufferedReader( + new InputStreamReader(System.in)); + String line; + try { + while((line = r.readLine()) != null) { + String parts[] = line.split(" "); + if(parts.length != 2) { + System.err.println("Bad input(" + line + ")"); + System.exit(2); + } + locationClient.addNameUrl(parts[0],parts[1]); + System.out.println("Added\t" + parts[0] + "\t" + parts[1]); + } + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } + + } else { + if(args.length < 3) { + USAGE(""); + System.exit(1); + } + String name = args[2]; + if(operation.equalsIgnoreCase("lookup")) { + if(args.length < 3) { + USAGE("lookup LOCATION-URL ARC"); + } + try { + String[] locations = locationClient.nameToUrls(name); + if(locations == null) { + System.err.println("No locations for " + name); + System.exit(1); + } + for(int i=0; i <locations.length; i++) { + System.out.println(locations[i]); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("get-mark")) { + if(args.length != 2) { + USAGE("get-mark LOCATION-URL"); + } + try { + long mark = locationClient.getCurrentMark(); + System.out.println(mark); + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("mark-range")) { + if(args.length != 4) { + USAGE("mark-range LOCATION-URL START END"); + } + long start = Long.parseLong(args[3]); + long end = Long.parseLong(args[4]); + try { + Iterator<String> it = + locationClient.getNamesBetweenMarks(start,end); + while(it.hasNext()) { + String next = (String) it.next(); + System.out.println(next); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + + } else if(operation.equalsIgnoreCase("add")) { + if(args.length != 4) { + USAGE("add LOCATION-URL ARC ARC-URL"); + } + String url = args[3]; + if(!url.startsWith("http://")) { + USAGE("ARC-URL argument 4 must begin with http://"); + } + try { + locationClient.addNameUrl(name,url); + System.out.println("OK"); + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("remove")) { + + if(args.length != 4) { + USAGE("remove LOCATION-URL FILE-NAME FILE-URL"); + } + String url = args[3]; + if(!url.startsWith("http://")) { + USAGE("URL argument 4 must begin with http://"); + } + try { + locationClient.removeNameUrl(name,url); + System.out.println("OK"); + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("sync")) { + + if(args.length != 4) { + USAGE("sync LOCATION-URL DIR DIR-URL"); + } + File dir = new File(name); + String dirUrl = args[3]; + if(!dirUrl.startsWith("http://")) { + USAGE("DIR-URL argument 4 must begin with http://"); + } + try { + if(!dir.isDirectory()) { + USAGE("DIR " + name + " is not a directory"); + } + + FileFilter filter = new FileFilter() { + public boolean accept(File daFile) { + return daFile.isFile() && + (daFile.getName().endsWith(ARC_SUFFIX) || + daFile.getName().endsWith(ARC_GZ_SUFFIX) || + daFile.getName().endsWith(WARC_SUFFIX) || + daFile.getName().endsWith(WARC_GZ_SUFFIX)); + } + }; + + File[] files = dir.listFiles(filter); + if(files == null) { + throw new IOException("Directory " + dir.getAbsolutePath() + + " is not a directory or had an IO error"); + } + for(int i = 0; i < files.length; i++) { + File file = files[i]; + String fileName = file.getName(); + String fileUrl = dirUrl + fileName; + LOGGER.info("Adding location " + fileUrl + + " for file " + fileName); + locationClient.addNameUrl(fileName,fileUrl); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else { + USAGE(" unknown operation " + operation); + } + } + } +} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java 2008-06-05 21:37:36 UTC (rev 2285) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -1,305 +0,0 @@ -/* FileLocationDB - * - * $Id$ - * - * Created on 3:08:59 PM Aug 18, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; - -import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.util.CloseableIterator; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ResourceFileLocationDB extends BDBRecordSet { - - /** - * String id for implementation class of FileLocationDBs. - */ - public static final String FILE_LOCATION_DB_CLASS = "filelocationdb"; - - protected static final String ARC_DB_PATH = "filelocationdb.path"; - - protected static final String ARC_DB_NAME = "filelocationdb.name"; - - protected static final String ARC_DB_LOG = "filelocationdb.logpath"; - - private final static String urlDelimiter = " "; - - private final static String urlDelimiterRE = " "; - - private ResourceFileLocationDBLog log; - private String logPath = null; - private String bdbPath = null; - private String bdbName = null; - - /** - * Constructor - */ - public ResourceFileLocationDB() { - super(); - } - - /** - * @throws DatabaseException - * @throws ConfigurationException - */ - public void init() throws DatabaseException, ConfigurationException { - if(logPath == null) { - throw new ConfigurationException("No logPath"); - } - log = new ResourceFileLocationDBLog(logPath); - initializeDB(bdbPath,bdbName); - } - - /** - * return an array of String URLs for all known locations of the ARC file - * in the DB. - * @param arcName - * @return String[] of URLs to arcName - * @throws DatabaseException - */ - public String[] arcToUrls(final String arcName) throws DatabaseException { - - String[] arcUrls = null; - String valueString = get(arcName); - if(valueString != null && valueString.length() > 0) { - arcUrls = valueString.split(urlDelimiterRE); - } - return arcUrls; - } - - /** - * add an Url location for an arcName, unless it already exists - * @param arcName - * @param arcUrl - * @throws DatabaseException - * @throws IOException - */ - public void addArcUrl(final String arcName, final String arcUrl) throws DatabaseException, IOException { - - // need to first see if there is already an entry for this arcName. - // if not, add arcUrl as the value. - // if so, check the current arcUrl locations for arcName - // if arcUrl exists, do nothing - // if arcUrl does not exist, add, and set that as the value. - - String newValue = null; - String oldValue = get(arcName); - if(oldValue != null && oldValue.length() > 0) { - String curUrls[] = oldValue.split(urlDelimiterRE); - boolean found = false; - for(int i=0; i < curUrls.length; i++) { - if(arcUrl.equals(curUrls[i])) { - found = true; - break; - } - } - if(found == false) { - newValue = oldValue + " " + arcUrl; - } - } else { - // null or empty value - newValue = arcUrl; - if(oldValue == null) log.addArc(arcName); - } - - // did we find a value? - if(newValue != null) { - put(arcName,newValue); - } - } - - /** - * remove a single Url location for an arcName, if it exists - * @param arcName - * @param arcUrl - * @throws DatabaseException - */ - public void removeArcUrl(final String arcName, final String arcUrl) throws DatabaseException { - // need to first see if there is already an entry for this arcName. - // if not, do nothing - // if so, loop thru all current arcUrl locations for arcName - // keep any that are not arcUrl - // if any locations are left, update to the new value, sans arcUrl - // if none are left, remove the entry from the db - - StringBuilder newValue = new StringBuilder(); - String oldValue = get(arcName); - if(oldValue != null && oldValue.length() > 0) { - String curUrls[] = oldValue.split(urlDelimiterRE); - - for(int i=0; i < curUrls.length; i++) { - if(!arcUrl.equals(curUrls[i])) { - if(newValue.length() > 0) { - newValue.append(urlDelimiter); - } - newValue.append(curUrls[i]); - } - } - - if(newValue.length() > 0) { - - // update - put(arcName, newValue.toString()); - - } else { - - // remove the entry: - delete(arcName); - } - } - } - - /** - * @param start - * @param end - * @return Iterator for traversing arcs between start and end. - * @throws IOException - */ - public CloseableIterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - return log.getArcsBetweenMarks(start, end); - } - - /** - * @return current "Mark" for the log. Currently, it's just the length of - * the log file. - */ - public long getCurrentMark() { - return log.getCurrentMark(); - } - - /** - * @return the logPath - */ - public String getLogPath() { - return logPath; - } - - /** - * @param logPath the logPath to set - */ - public void setLogPath(String logPath) { - this.logPath = logPath; - } - - /** - * @return the bdbPath - */ - public String getBdbPath() { - return bdbPath; - } - - /** - * @param bdbPath the bdbPath to set - */ - public void setBdbPath(String bdbPath) { - this.bdbPath = bdbPath; - } - - /** - * @return the bdbName - */ - public String getBdbName() { - return bdbName; - } - - /** - * @param bdbName the bdbName to set - */ - public void setBdbName(String bdbName) { - this.bdbName = bdbName; - } - private static void USAGE(String message) { - System.err.print("USAGE: " + message + "\n" + - "\tDBDIR DBNAME LOGPATH\n" + - "\n" + - "\t\tread lines from STDIN formatted like:\n" + - "\t\t\tNAME<SPACE>URL\n" + - "\t\tand for each line, add to locationDB that file NAME is\n" + - "\t\tlocated at URL. Use locationDB in DBDIR at DBNAME, \n" + - "\t\tcreating if it does not exist.\n" - ); - System.exit(2); - } - - /** - * @param args - */ - public static void main(String[] args) { - if(args.length != 3) { - USAGE(""); - System.exit(1); - } - String bdbPath = args[0]; - String bdbName = args[1]; - String logPath = args[2]; - ResourceFileLocationDB db = new ResourceFileLocationDB(); - db.setBdbPath(bdbPath); - db.setBdbName(bdbName); - db.setLogPath(logPath); - BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); - String line; - int exitCode = 0; - try { - db.init(); - while((line = r.readLine()) != null) { - String parts[] = line.split(" "); - if(parts.length != 2) { - System.err.println("Bad input(" + line + ")"); - System.exit(2); - } - db.addArcUrl(parts[0],parts[1]); - System.out.println("Added\t" + parts[0] + "\t" + parts[1]); - } - } catch (IOException e) { - e.printStackTrace(); - exitCode = 1; - } catch (DatabaseException e) { - e.printStackTrace(); - exitCode = 1; - } catch (ConfigurationException e) { - e.printStackTrace(); - exitCode = 1; - } finally { - try { - db.shutdownDB(); - } catch (DatabaseException e) { - e.printStackTrace(); - exitCode = 1; - } - } - System.exit(exitCode); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java 2008-06-05 21:37:36 UTC (rev 2285) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -1,416 +0,0 @@ -/* FileLocationDBClient - * - * $Id$ - * - * Created on 5:59:49 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileFilter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Arrays; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpStatus; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.PostMethod; -import org.apache.commons.httpclient.util.ParameterFormatter; -import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBServlet; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ResourceFileLocationDBClient { - private static final Logger LOGGER = Logger.getLogger(ResourceFileLocationDBClient - .class.getName()); - - private final static String ARC_SUFFIX = ".arc"; - private final static String ARC_GZ_SUFFIX = ".arc.gz"; - private final static String WARC_SUFFIX = ".warc"; - private final static String WARC_GZ_SUFFIX = ".warc.gz"; - private final static String OK_RESPONSE_PREFIX = "OK "; - private HttpClient client = null; - - private String serverUrl = null; - - /** - * @param serverUrl - */ - public ResourceFileLocationDBClient(final String serverUrl) { - super(); - this.serverUrl = serverUrl; - this.client = new HttpClient(); - } - - /** - * @return long value representing the current end "mark" of the db log - * @throws NumberFormatException - * @throws IOException - */ - public long getCurrentMark() throws NumberFormatException, IOException { - NameValuePair[] args = { - new NameValuePair( - ResourceFileLocationDBServlet.OPERATION_ARGUMENT, - ResourceFileLocationDBServlet.GETMARK_OPERATION), - }; - return Long.parseLong(doGetMethod(args)); - } - - /** - * @param start - * @param end - * @return Iterator of arc file names between marks start and end - * @throws IOException - */ - public Iterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - NameValuePair[] args = { - new NameValuePair( - ResourceFileLocationDBServlet.OPERATION_ARGUMENT, - ResourceFileLocationDBServlet.GETRANGE_OPERATION), - new NameValuePair( - ResourceFileLocationDBServlet.START_ARGUMENT, - String.valueOf(start)), - new NameValuePair( - ResourceFileLocationDBServlet.END_ARGUMENT, - String.valueOf(end)) - }; - return Arrays.asList(doGetMethod(args).split("\n")).iterator(); - } - - /** - * return an array of String URLs for all known locations of the ARC file - * in the DB. - * @param arcName - * @return String[] of URLs to arcName - * @throws IOException - */ - public String[] arcToUrls(final String arcName) throws IOException { - - NameValuePair[] args = { - new NameValuePair( - ResourceFileLocationDBServlet.OPERATION_ARGUMENT, - ResourceFileLocationDBServlet.LOOKUP_OPERATION), - - new NameValuePair( - ResourceFileLocationDBServlet.NAME_ARGUMENT, - arcName) - }; - String locations = doGetMethod(args); - if(locations != null) { - return locations.split("\n"); - } - return null; - } - - - /** - * add an Url location for an arcName, unless it already exists - * @param arcName - * @param arcUrl - * @throws IOException - */ - public void addArcUrl(final String arcName, final String arcUrl) - throws IOException { - doPostMethod(ResourceFileLocationDBServlet.ADD_OPERATION, arcName, arcUrl); - } - - /** - * remove a single Url location for an arcName, if it exists - * @param arcName - * @param arcUrl - * @throws IOException - */ - public void removeArcUrl(final String arcName, final String arcUrl) - throws IOException { - doPostMethod(ResourceFileLocationDBServlet.REMOVE_OPERATION, arcName, arcUrl); - } - - private String doGetMethod(NameValuePair[] data) throws IOException { - ParameterFormatter formatter = new ParameterFormatter(); - formatter.setAlwaysUseQuotes(false); - StringBuilder finalUrl = new StringBuilder(serverUrl); - if(data.length > 0) { - finalUrl.append("?"); - } - for(int i = 0; i < data.length; i++) { - if(i == 0) { - finalUrl.append("?"); - } else { - finalUrl.append("&"); - } - finalUrl.append(formatter.format(data[i])); - } - - GetMethod method = new GetMethod(finalUrl.toString()); - - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine()); - } - String responseString = method.getResponseBodyAsString(); - if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { - if(responseString.startsWith(ResourceFileLocationDBServlet.NO_LOCATION_PREFIX)) { - return null; - } - throw new IOException(responseString); - } - return responseString.substring(OK_RESPONSE_PREFIX.length()+1); - } - - private void doPostMethod(final String operation, final String arcName, - final String arcUrl) - throws IOException { - PostMethod method = new PostMethod(serverUrl); - NameValuePair[] data = { - new NameValuePair(ResourceFileLocationDBServlet.OPERATION_ARGUMENT, - operation), - new NameValuePair(ResourceFileLocationDBServlet.NAME_ARGUMENT, - arcName), - new NameValuePair(ResourceFileLocationDBServlet.URL_ARGUMENT, - arcUrl) - }; - method.setRequestBody(data); - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine()); - } - String responseString = method.getResponseBodyAsString(); - if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { - throw new IOException(responseString); - } - } - - private static void USAGE(String message) { - System.err.print("USAGE: " + message + "\n" + - "\t[lookup|add|remove|sync] ...\n" + - "\n" + - "\t lookup LOCATION-DB-URL ARC\n" + - "\t\temit all known URLs for arc ARC\n" + - "\n" + - "\t add LOCATION-DB-URL ARC URL\n" + - "\t\tinform locationDB that ARC is located at URL\n" + - "\n" + - "\t remove LOCATION-DB-URL ARC URL\n" + - "\t\tremove reference to ARC at URL in locationDB\n" + - "\n" + - "\t sync LOCATION-DB-URL DIR DIR-URL\n" + - "\t\tscan directory DIR, and submit all ARC files therein\n" + - "\t\tto locationDB at url DIR-URL/ARC\n" + - "\n" + - "\t get-mark LOCATION-DB-URL\n" + - "\t\temit an identifier for the current marker in the \n" + - "\t\tlocationDB log. These identifiers can be used with the\n" + - "\t\tmark-range operation.\n" + - "\n" + - "\t mark-range LOCATION-DB-URL START END\n" + - "\t\temit to STDOUT one line with the name of all ARC files\n" + - "\t\tadded to the locationDB between marks START and END\n" + - "\n" + - "\t add-stream LOCATION-DB-URL\n" + - "\t\tread lines from STDIN formatted like:\n" + - "\t\t\tNAME<SPACE>URL\n" + - "\t\tand for each line, inform locationDB that file NAME is\n" + - "\t\tlocated at URL\n" - ); - System.exit(2); - } - - /** - * @param args - */ - public static void main(String[] args) { - if(args.length < 2) { - USAGE(""); - System.exit(1); - } - String operation = args[0]; - String url = args[1]; - if(!url.startsWith("http://")) { - USAGE("URL argument 1 must begin with http://"); - } - - ResourceFileLocationDBClient locationClient = new ResourceFileLocationDBClient(url); - - if(operation.equalsIgnoreCase("add-stream")) { - BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); - String line; - try { - while((line = r.readLine()) != null) { - String parts[] = line.split(" "); - if(parts.length != 2) { - System.err.println("Bad input(" + line + ")"); - System.exit(2); - } - locationClient.addArcUrl(parts[0],parts[1]); - System.out.println("Added\t" + parts[0] + "\t" + parts[1]); - } - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - - } else { - if(args.length < 3) { - USAGE(""); - System.exit(1); - } - String arc = args[2]; - if(operation.equalsIgnoreCase("lookup")) { - if(args.length < 3) { - USAGE("lookup LOCATION-URL ARC"); - } - try { - String[] locations = locationClient.arcToUrls(arc); - if(locations == null) { - System.err.println("No locations for " + arc); - System.exit(1); - } - for(int i=0; i <locations.length; i++) { - System.out.println(locations[i]); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("get-mark")) { - if(args.length != 2) { - USAGE("get-mark LOCATION-URL"); - } - try { - long mark = locationClient.getCurrentMark(); - System.out.println(mark); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("mark-range")) { - if(args.length != 4) { - USAGE("mark-range LOCATION-URL START END"); - } - long start = Long.parseLong(args[3]); - long end = Long.parseLong(args[4]); - try { - Iterator<String> it = - locationClient.getArcsBetweenMarks(start,end); - while(it.hasNext()) { - String next = (String) it.next(); - System.out.println(next); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - - } else if(operation.equalsIgnoreCase("add")) { - if(args.length != 4) { - USAGE("add LOCATION-URL ARC ARC-URL"); - } - String arcUrl = args[3]; - if(!arcUrl.startsWith("http://")) { - USAGE("ARC-URL argument 4 must begin with http://"); - } - try { - locationClient.addArcUrl(arc,arcUrl); - System.out.println("OK"); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("remove")) { - - if(args.length != 4) { - USAGE("remove LOCATION-URL ARC ARC-URL"); - } - String arcUrl = args[3]; - if(!arcUrl.startsWith("http://")) { - USAGE("ARC-URL argument 4 must begin with http://"); - } - try { - locationClient.removeArcUrl(arc,arcUrl); - System.out.println("OK"); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("sync")) { - - if(args.length != 4) { - USAGE("sync LOCATION-URL DIR DIR-URL"); - } - File dir = new File(arc); - String dirUrl = args[3]; - if(!dirUrl.startsWith("http://")) { - USAGE("DIR-URL argument 4 must begin with http://"); - } - try { - if(!dir.isDirectory()) { - USAGE("DIR " + arc + " is not a directory"); - } - - FileFilter filter = new FileFilter() { - public boolean accept(File daFile) { - return daFile.isFile() && - (daFile.getName().endsWith(ARC_SUFFIX) || - daFile.getName().endsWith(ARC_GZ_SUFFIX) || - daFile.getName().endsWith(WARC_SUFFIX) || - daFile.getName().endsWith(WARC_GZ_SUFFIX)); - } - }; - - File[] files = dir.listFiles(filter); - if(files == null) { - throw new IOException("Directory " + dir.getAbsolutePath() + - " is not a directory or had an IO error"); - } - for(int i = 0; i < files.length; i++) { - File file = files[i]; - String name = file.getName(); - String fileUrl = dirUrl + name; - LOGGER.info("Adding location " + fileUrl + " for file " + name); - locationClient.addArcUrl(name,fileUrl); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else { - USAGE(" unknown operation " + operation); - } - } - } -} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2008-06-05 21:37:36 UTC (rev 2285) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -38,7 +38,10 @@ import sun.reflect.generics.reflectiveObjects.NotImplementedException; /** + * Simple log file tracking new names being added to a ResourceFileLocationDB. * + * Also supports returning an iterator of Strings to a byte range of the log, to + * simplify tracking deltas to the DB. * * @author brad * @version $Date$, $Revision$ Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java 2008-06-05 21:37:36 UTC (rev 2285) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java 2008-06-05 21:52:54 UTC (rev 2286) @@ -34,14 +34,13 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; import org.archive.wayback.webapp.ServletRequestContext; -import com.sleepycat.je.DatabaseException; - /** + * ServletRequestContext enabling remote HTTP GET/POST access to a local + * ResourceFileLocationDB. See RemoveResourceFileLocationDB for the client + * class implemented against this. * - * * @author brad * @version $Date$, $Revision$ */ @@ -68,9 +67,8 @@ @SuppressWarnings("unchecked") Map<String,String[]> queryMap = httpRequest.getParameterMap(); String message; - ResourceFileLocationDB locationDB = getLocationDB(); try { - message = handleOperation(locationDB,queryMap); + message = handleOperation(queryMap); httpResponse.setStatus(HttpServletResponse.SC_OK); httpResponse.setContentType("text/plain"); OutputStream os = httpResponse.getOutputStream(); @@ -83,18 +81,17 @@ return true; } - private String handleOperation(ResourceFileLocationDB locationDB, - Map<String,String[]> queryMap) + private String handleOperation(Map<String,String[]> queryMap) throws ParseException { String operation = getRequiredMapParam(queryMap, OPERATION_ARGUMENT); String message; try { if (operation.equals(LOOKUP_OPERATION)) { - String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); + String name = getRequiredMapParam(queryMap, NAME_ARGUMENT); - message = NO_LOCATION_PREFIX + " " + arcName; - String arcUrls[] = locationDB.arcToUrls(arcName); + message = NO_LOCATION_PREFIX + " " + name; + String arcUrls[] = locationDB.nameToUrls(name); if (arcUrls != null && arcUrls.length > 0) { StringBuffer buf = new StringBuffer("OK "); for (int i = 0; i < arcUrls.length; i++) { @@ -112,7 +109,7 @@ long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT)); long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT)); - Iterator<String> itr = locationDB.getArcsBetweenMarks(start,end); + Iterator<String> itr = locationDB.getNamesBetweenMarks(start,end); StringBuilder str = new StringBuilder(); str.append("OK "); while(itr.hasNext()) { @@ -123,17 +120,17 @@ } else { - String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); - String arcUrl = getRequiredMapParam(queryMap, URL_ARGUMENT); + String name = getRequiredMapParam(queryMap, NAME_ARGUMENT); + String url = getRequiredMapParam(queryMap, URL_ARGUMENT); if (operation.equals(ADD_OPERATION)) { - locationDB.addArcUrl(arcName, arcUrl); - message = "OK added url " + arcUrl + " for " + arcName; + locationDB.addNameUrl(name, url); + message = "OK added url " + url + " for " + name; } else if (operation.equals(REMOVE_OPERATION)) { - getLocationDB().removeArcUrl(arcName, arcUrl); - message = "OK removed url " + arcUrl + " for " + arcName; + locationDB.removeNameUrl(name, url); + message = "OK removed url " + url + " for " + name; } else { @@ -143,11 +140,7 @@ } } - } catch (DatabaseException e) { - e.printStackTrace(); - message = e.getMessage(); } catch (IOException e) { - // TODO Auto-generated catch block e.printStackTrace(); message = e.getMessage(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2285 http://archive-access.svn.sourceforge.net/archive-access/?rev=2285&view=rev Author: bradtofel Date: 2008-06-05 14:37:36 -0700 (Thu, 05 Jun 2008) Log Message: ----------- INITIAL REV: wrapper allowing normal Iterators to be used as CloseableIterators Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WrappedCloseableIterator.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WrappedCloseableIterator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WrappedCloseableIterator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/WrappedCloseableIterator.java 2008-06-05 21:37:36 UTC (rev 2285) @@ -0,0 +1,63 @@ +/* WrappedClosableIterator + * + * $Id$ + * + * Created on 2:16:56 PM Jun 5, 2008. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.util; + +import java.io.IOException; +import java.util.Iterator; + +/** + * Simple wrapper around a normal Iterator which allows use of the close(). + * + * @author brad + * @version $Date$, $Revision$ + */ +public class WrappedCloseableIterator<E> implements CloseableIterator<E> { + + private Iterator<E> inner = null; + + public WrappedCloseableIterator(Iterator<E> inner) { + this.inner = inner; + } + + public boolean hasNext() { + return inner.hasNext(); + } + + public E next() { + return inner.next(); + } + + public void remove() { + inner.remove(); + } + + /* (non-Javadoc) + * @see java.io.Closeable#close() + */ + public void close() throws IOException { + // NO-OP + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-05 20:42:44
|
Revision: 2284 http://archive-access.svn.sourceforge.net/archive-access/?rev=2284&view=rev Author: bradtofel Date: 2008-06-05 13:42:53 -0700 (Thu, 05 Jun 2008) Log Message: ----------- RENAMED classes Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBServlet.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java 2008-06-05 20:39:13 UTC (rev 2283) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -1,124 +0,0 @@ -/* ArcProxyServlet - * - * $Id$ - * - * Created on 6:19:54 PM Aug 10, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URL; -import java.net.URLConnection; -import java.text.ParseException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.resourcestore.locationdb.FileLocationDB; -import org.archive.wayback.webapp.ServletRequestContext; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ArcProxyServlet extends ServletRequestContext { - - private static final String RANGE_HTTP_HEADER = "Range"; - private static final String CONTENT_TYPE_HEADER = "Content-Type"; - private static final String CONTENT_TYPE = "application/x-gzip"; - /** - * - */ - private static final long serialVersionUID = 1L; - private FileLocationDB locationDB = null; - - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws IOException, - ServletException { - - try { - String arc = httpRequest.getRequestURI(); - arc = arc.substring(arc.lastIndexOf('/')+1); - if(arc.length() == 0) { - throw new ParseException("no/invalid arc",0); - } - String urls[] = locationDB.arcToUrls(arc); - if(urls == null || urls.length == 0) { - throw new DatabaseException("Unable to locate("+arc+")"); - } - String urlString = urls[0]; - String rangeHeader = httpRequest.getHeader(RANGE_HTTP_HEADER); - URL url = new URL(urlString); - URLConnection conn = url.openConnection(); - if(rangeHeader != null) { - conn.addRequestProperty(RANGE_HTTP_HEADER,rangeHeader); - } - InputStream is = conn.getInputStream(); - httpResponse.setStatus(HttpServletResponse.SC_OK); - String typeHeader = conn.getHeaderField(CONTENT_TYPE_HEADER); - if(typeHeader == null) { - typeHeader = CONTENT_TYPE; - } - httpResponse.setContentType(typeHeader); - OutputStream os = httpResponse.getOutputStream(); - int BUF_SIZE = 4096; - byte[] buffer = new byte[BUF_SIZE]; - try { - for (int r = -1; (r = is.read(buffer, 0, BUF_SIZE)) != -1;) { - os.write(buffer, 0, r); - } - } finally { - is.close(); - } - } catch (ParseException e) { - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, - e.getMessage()); - } catch (DatabaseException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, - e.getMessage()); - } - return true; - } - - /** - * @return the locationDB - */ - public FileLocationDB getLocationDB() { - return locationDB; - } - - /** - * @param locationDB the locationDB to set - */ - public void setLocationDB(FileLocationDB locationDB) { - this.locationDB = locationDB; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java 2008-06-05 20:39:13 UTC (rev 2283) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -1,305 +0,0 @@ -/* FileLocationDB - * - * $Id$ - * - * Created on 3:08:59 PM Aug 18, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; - -import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.util.CloseableIterator; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDB extends BDBRecordSet { - - /** - * String id for implementation class of FileLocationDBs. - */ - public static final String FILE_LOCATION_DB_CLASS = "filelocationdb"; - - protected static final String ARC_DB_PATH = "filelocationdb.path"; - - protected static final String ARC_DB_NAME = "filelocationdb.name"; - - protected static final String ARC_DB_LOG = "filelocationdb.logpath"; - - private final static String urlDelimiter = " "; - - private final static String urlDelimiterRE = " "; - - private FileLocationDBLog log; - private String logPath = null; - private String bdbPath = null; - private String bdbName = null; - - /** - * Constructor - */ - public FileLocationDB() { - super(); - } - - /** - * @throws DatabaseException - * @throws ConfigurationException - */ - public void init() throws DatabaseException, ConfigurationException { - if(logPath == null) { - throw new ConfigurationException("No logPath"); - } - log = new FileLocationDBLog(logPath); - initializeDB(bdbPath,bdbName); - } - - /** - * return an array of String URLs for all known locations of the ARC file - * in the DB. - * @param arcName - * @return String[] of URLs to arcName - * @throws DatabaseException - */ - public String[] arcToUrls(final String arcName) throws DatabaseException { - - String[] arcUrls = null; - String valueString = get(arcName); - if(valueString != null && valueString.length() > 0) { - arcUrls = valueString.split(urlDelimiterRE); - } - return arcUrls; - } - - /** - * add an Url location for an arcName, unless it already exists - * @param arcName - * @param arcUrl - * @throws DatabaseException - * @throws IOException - */ - public void addArcUrl(final String arcName, final String arcUrl) throws DatabaseException, IOException { - - // need to first see if there is already an entry for this arcName. - // if not, add arcUrl as the value. - // if so, check the current arcUrl locations for arcName - // if arcUrl exists, do nothing - // if arcUrl does not exist, add, and set that as the value. - - String newValue = null; - String oldValue = get(arcName); - if(oldValue != null && oldValue.length() > 0) { - String curUrls[] = oldValue.split(urlDelimiterRE); - boolean found = false; - for(int i=0; i < curUrls.length; i++) { - if(arcUrl.equals(curUrls[i])) { - found = true; - break; - } - } - if(found == false) { - newValue = oldValue + " " + arcUrl; - } - } else { - // null or empty value - newValue = arcUrl; - if(oldValue == null) log.addArc(arcName); - } - - // did we find a value? - if(newValue != null) { - put(arcName,newValue); - } - } - - /** - * remove a single Url location for an arcName, if it exists - * @param arcName - * @param arcUrl - * @throws DatabaseException - */ - public void removeArcUrl(final String arcName, final String arcUrl) throws DatabaseException { - // need to first see if there is already an entry for this arcName. - // if not, do nothing - // if so, loop thru all current arcUrl locations for arcName - // keep any that are not arcUrl - // if any locations are left, update to the new value, sans arcUrl - // if none are left, remove the entry from the db - - StringBuilder newValue = new StringBuilder(); - String oldValue = get(arcName); - if(oldValue != null && oldValue.length() > 0) { - String curUrls[] = oldValue.split(urlDelimiterRE); - - for(int i=0; i < curUrls.length; i++) { - if(!arcUrl.equals(curUrls[i])) { - if(newValue.length() > 0) { - newValue.append(urlDelimiter); - } - newValue.append(curUrls[i]); - } - } - - if(newValue.length() > 0) { - - // update - put(arcName, newValue.toString()); - - } else { - - // remove the entry: - delete(arcName); - } - } - } - - /** - * @param start - * @param end - * @return Iterator for traversing arcs between start and end. - * @throws IOException - */ - public CloseableIterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - return log.getArcsBetweenMarks(start, end); - } - - /** - * @return current "Mark" for the log. Currently, it's just the length of - * the log file. - */ - public long getCurrentMark() { - return log.getCurrentMark(); - } - - /** - * @return the logPath - */ - public String getLogPath() { - return logPath; - } - - /** - * @param logPath the logPath to set - */ - public void setLogPath(String logPath) { - this.logPath = logPath; - } - - /** - * @return the bdbPath - */ - public String getBdbPath() { - return bdbPath; - } - - /** - * @param bdbPath the bdbPath to set - */ - public void setBdbPath(String bdbPath) { - this.bdbPath = bdbPath; - } - - /** - * @return the bdbName - */ - public String getBdbName() { - return bdbName; - } - - /** - * @param bdbName the bdbName to set - */ - public void setBdbName(String bdbName) { - this.bdbName = bdbName; - } - private static void USAGE(String message) { - System.err.print("USAGE: " + message + "\n" + - "\tDBDIR DBNAME LOGPATH\n" + - "\n" + - "\t\tread lines from STDIN formatted like:\n" + - "\t\t\tNAME<SPACE>URL\n" + - "\t\tand for each line, add to locationDB that file NAME is\n" + - "\t\tlocated at URL. Use locationDB in DBDIR at DBNAME, \n" + - "\t\tcreating if it does not exist.\n" - ); - System.exit(2); - } - - /** - * @param args - */ - public static void main(String[] args) { - if(args.length != 3) { - USAGE(""); - System.exit(1); - } - String bdbPath = args[0]; - String bdbName = args[1]; - String logPath = args[2]; - FileLocationDB db = new FileLocationDB(); - db.setBdbPath(bdbPath); - db.setBdbName(bdbName); - db.setLogPath(logPath); - BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); - String line; - int exitCode = 0; - try { - db.init(); - while((line = r.readLine()) != null) { - String parts[] = line.split(" "); - if(parts.length != 2) { - System.err.println("Bad input(" + line + ")"); - System.exit(2); - } - db.addArcUrl(parts[0],parts[1]); - System.out.println("Added\t" + parts[0] + "\t" + parts[1]); - } - } catch (IOException e) { - e.printStackTrace(); - exitCode = 1; - } catch (DatabaseException e) { - e.printStackTrace(); - exitCode = 1; - } catch (ConfigurationException e) { - e.printStackTrace(); - exitCode = 1; - } finally { - try { - db.shutdownDB(); - } catch (DatabaseException e) { - e.printStackTrace(); - exitCode = 1; - } - } - System.exit(exitCode); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java 2008-06-05 20:39:13 UTC (rev 2283) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -1,416 +0,0 @@ -/* FileLocationDBClient - * - * $Id$ - * - * Created on 5:59:49 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileFilter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Arrays; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpStatus; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.PostMethod; -import org.apache.commons.httpclient.util.ParameterFormatter; -import org.archive.wayback.resourcestore.locationdb.FileLocationDBServlet; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBClient { - private static final Logger LOGGER = Logger.getLogger(FileLocationDBClient - .class.getName()); - - private final static String ARC_SUFFIX = ".arc"; - private final static String ARC_GZ_SUFFIX = ".arc.gz"; - private final static String WARC_SUFFIX = ".warc"; - private final static String WARC_GZ_SUFFIX = ".warc.gz"; - private final static String OK_RESPONSE_PREFIX = "OK "; - private HttpClient client = null; - - private String serverUrl = null; - - /** - * @param serverUrl - */ - public FileLocationDBClient(final String serverUrl) { - super(); - this.serverUrl = serverUrl; - this.client = new HttpClient(); - } - - /** - * @return long value representing the current end "mark" of the db log - * @throws NumberFormatException - * @throws IOException - */ - public long getCurrentMark() throws NumberFormatException, IOException { - NameValuePair[] args = { - new NameValuePair( - FileLocationDBServlet.OPERATION_ARGUMENT, - FileLocationDBServlet.GETMARK_OPERATION), - }; - return Long.parseLong(doGetMethod(args)); - } - - /** - * @param start - * @param end - * @return Iterator of arc file names between marks start and end - * @throws IOException - */ - public Iterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - NameValuePair[] args = { - new NameValuePair( - FileLocationDBServlet.OPERATION_ARGUMENT, - FileLocationDBServlet.GETRANGE_OPERATION), - new NameValuePair( - FileLocationDBServlet.START_ARGUMENT, - String.valueOf(start)), - new NameValuePair( - FileLocationDBServlet.END_ARGUMENT, - String.valueOf(end)) - }; - return Arrays.asList(doGetMethod(args).split("\n")).iterator(); - } - - /** - * return an array of String URLs for all known locations of the ARC file - * in the DB. - * @param arcName - * @return String[] of URLs to arcName - * @throws IOException - */ - public String[] arcToUrls(final String arcName) throws IOException { - - NameValuePair[] args = { - new NameValuePair( - FileLocationDBServlet.OPERATION_ARGUMENT, - FileLocationDBServlet.LOOKUP_OPERATION), - - new NameValuePair( - FileLocationDBServlet.NAME_ARGUMENT, - arcName) - }; - String locations = doGetMethod(args); - if(locations != null) { - return locations.split("\n"); - } - return null; - } - - - /** - * add an Url location for an arcName, unless it already exists - * @param arcName - * @param arcUrl - * @throws IOException - */ - public void addArcUrl(final String arcName, final String arcUrl) - throws IOException { - doPostMethod(FileLocationDBServlet.ADD_OPERATION, arcName, arcUrl); - } - - /** - * remove a single Url location for an arcName, if it exists - * @param arcName - * @param arcUrl - * @throws IOException - */ - public void removeArcUrl(final String arcName, final String arcUrl) - throws IOException { - doPostMethod(FileLocationDBServlet.REMOVE_OPERATION, arcName, arcUrl); - } - - private String doGetMethod(NameValuePair[] data) throws IOException { - ParameterFormatter formatter = new ParameterFormatter(); - formatter.setAlwaysUseQuotes(false); - StringBuilder finalUrl = new StringBuilder(serverUrl); - if(data.length > 0) { - finalUrl.append("?"); - } - for(int i = 0; i < data.length; i++) { - if(i == 0) { - finalUrl.append("?"); - } else { - finalUrl.append("&"); - } - finalUrl.append(formatter.format(data[i])); - } - - GetMethod method = new GetMethod(finalUrl.toString()); - - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine()); - } - String responseString = method.getResponseBodyAsString(); - if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { - if(responseString.startsWith(FileLocationDBServlet.NO_LOCATION_PREFIX)) { - return null; - } - throw new IOException(responseString); - } - return responseString.substring(OK_RESPONSE_PREFIX.length()+1); - } - - private void doPostMethod(final String operation, final String arcName, - final String arcUrl) - throws IOException { - PostMethod method = new PostMethod(serverUrl); - NameValuePair[] data = { - new NameValuePair(FileLocationDBServlet.OPERATION_ARGUMENT, - operation), - new NameValuePair(FileLocationDBServlet.NAME_ARGUMENT, - arcName), - new NameValuePair(FileLocationDBServlet.URL_ARGUMENT, - arcUrl) - }; - method.setRequestBody(data); - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine()); - } - String responseString = method.getResponseBodyAsString(); - if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { - throw new IOException(responseString); - } - } - - private static void USAGE(String message) { - System.err.print("USAGE: " + message + "\n" + - "\t[lookup|add|remove|sync] ...\n" + - "\n" + - "\t lookup LOCATION-DB-URL ARC\n" + - "\t\temit all known URLs for arc ARC\n" + - "\n" + - "\t add LOCATION-DB-URL ARC URL\n" + - "\t\tinform locationDB that ARC is located at URL\n" + - "\n" + - "\t remove LOCATION-DB-URL ARC URL\n" + - "\t\tremove reference to ARC at URL in locationDB\n" + - "\n" + - "\t sync LOCATION-DB-URL DIR DIR-URL\n" + - "\t\tscan directory DIR, and submit all ARC files therein\n" + - "\t\tto locationDB at url DIR-URL/ARC\n" + - "\n" + - "\t get-mark LOCATION-DB-URL\n" + - "\t\temit an identifier for the current marker in the \n" + - "\t\tlocationDB log. These identifiers can be used with the\n" + - "\t\tmark-range operation.\n" + - "\n" + - "\t mark-range LOCATION-DB-URL START END\n" + - "\t\temit to STDOUT one line with the name of all ARC files\n" + - "\t\tadded to the locationDB between marks START and END\n" + - "\n" + - "\t add-stream LOCATION-DB-URL\n" + - "\t\tread lines from STDIN formatted like:\n" + - "\t\t\tNAME<SPACE>URL\n" + - "\t\tand for each line, inform locationDB that file NAME is\n" + - "\t\tlocated at URL\n" - ); - System.exit(2); - } - - /** - * @param args - */ - public static void main(String[] args) { - if(args.length < 2) { - USAGE(""); - System.exit(1); - } - String operation = args[0]; - String url = args[1]; - if(!url.startsWith("http://")) { - USAGE("URL argument 1 must begin with http://"); - } - - FileLocationDBClient locationClient = new FileLocationDBClient(url); - - if(operation.equalsIgnoreCase("add-stream")) { - BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); - String line; - try { - while((line = r.readLine()) != null) { - String parts[] = line.split(" "); - if(parts.length != 2) { - System.err.println("Bad input(" + line + ")"); - System.exit(2); - } - locationClient.addArcUrl(parts[0],parts[1]); - System.out.println("Added\t" + parts[0] + "\t" + parts[1]); - } - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - - } else { - if(args.length < 3) { - USAGE(""); - System.exit(1); - } - String arc = args[2]; - if(operation.equalsIgnoreCase("lookup")) { - if(args.length < 3) { - USAGE("lookup LOCATION-URL ARC"); - } - try { - String[] locations = locationClient.arcToUrls(arc); - if(locations == null) { - System.err.println("No locations for " + arc); - System.exit(1); - } - for(int i=0; i <locations.length; i++) { - System.out.println(locations[i]); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("get-mark")) { - if(args.length != 2) { - USAGE("get-mark LOCATION-URL"); - } - try { - long mark = locationClient.getCurrentMark(); - System.out.println(mark); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("mark-range")) { - if(args.length != 4) { - USAGE("mark-range LOCATION-URL START END"); - } - long start = Long.parseLong(args[3]); - long end = Long.parseLong(args[4]); - try { - Iterator<String> it = - locationClient.getArcsBetweenMarks(start,end); - while(it.hasNext()) { - String next = (String) it.next(); - System.out.println(next); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - - } else if(operation.equalsIgnoreCase("add")) { - if(args.length != 4) { - USAGE("add LOCATION-URL ARC ARC-URL"); - } - String arcUrl = args[3]; - if(!arcUrl.startsWith("http://")) { - USAGE("ARC-URL argument 4 must begin with http://"); - } - try { - locationClient.addArcUrl(arc,arcUrl); - System.out.println("OK"); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("remove")) { - - if(args.length != 4) { - USAGE("remove LOCATION-URL ARC ARC-URL"); - } - String arcUrl = args[3]; - if(!arcUrl.startsWith("http://")) { - USAGE("ARC-URL argument 4 must begin with http://"); - } - try { - locationClient.removeArcUrl(arc,arcUrl); - System.out.println("OK"); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("sync")) { - - if(args.length != 4) { - USAGE("sync LOCATION-URL DIR DIR-URL"); - } - File dir = new File(arc); - String dirUrl = args[3]; - if(!dirUrl.startsWith("http://")) { - USAGE("DIR-URL argument 4 must begin with http://"); - } - try { - if(!dir.isDirectory()) { - USAGE("DIR " + arc + " is not a directory"); - } - - FileFilter filter = new FileFilter() { - public boolean accept(File daFile) { - return daFile.isFile() && - (daFile.getName().endsWith(ARC_SUFFIX) || - daFile.getName().endsWith(ARC_GZ_SUFFIX) || - daFile.getName().endsWith(WARC_SUFFIX) || - daFile.getName().endsWith(WARC_GZ_SUFFIX)); - } - }; - - File[] files = dir.listFiles(filter); - if(files == null) { - throw new IOException("Directory " + dir.getAbsolutePath() + - " is not a directory or had an IO error"); - } - for(int i = 0; i < files.length; i++) { - File file = files[i]; - String name = file.getName(); - String fileUrl = dirUrl + name; - LOGGER.info("Adding location " + fileUrl + " for file " + name); - locationClient.addArcUrl(name,fileUrl); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else { - USAGE(" unknown operation " + operation); - } - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java 2008-06-05 20:39:13 UTC (rev 2283) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -1,173 +0,0 @@ -/* FileLocationDBLog - * - * $Id$ - * - * Created on 2:38:18 PM Aug 18, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.RandomAccessFile; - -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.util.flatfile.RecordIterator; - -import sun.reflect.generics.reflectiveObjects.NotImplementedException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBLog extends File { - - /** - * - */ - private static final long serialVersionUID = -9128222006544481378L; - - /** - * @param pathname - * @throws ConfigurationException - */ - public FileLocationDBLog(String pathname) throws ConfigurationException { - super(pathname); - if (!isFile()) { - if (exists()) { - throw new ConfigurationException("path(" + pathname - + ") exists but is not a file!"); - } - try { - if (!createNewFile()) { - throw new ConfigurationException( - "Unable to create empty file " + pathname); - } - } catch (IOException e) { - e.printStackTrace(); - throw new ConfigurationException("Unable to create empty file " - + pathname); - } - } - } - - /** - * @return long value indicating the current end position of the log - */ - public long getCurrentMark() { - return length(); - } - - /** - * @param start - * @param end - * @return CleanableIterator that returns all arcs between start and end - * @throws IOException - */ - public CloseableIterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - - RandomAccessFile raf = new RandomAccessFile(this, "r"); - raf.seek(start); - BufferedReader is = new BufferedReader(new FileReader(raf.getFD())); - return new BufferedRangeIterator(new RecordIterator(is),end - start); - } - - /** - * @param arcName - * @throws IOException - */ - public synchronized void addArc(String arcName) throws IOException { - FileWriter writer = new FileWriter(this, true); - writer.write(arcName + "\n"); - writer.flush(); - writer.close(); - } - - private class BufferedRangeIterator implements CloseableIterator<String> { - private RecordIterator itr; - private long bytesToSend; - private long bytesSent; - private String next; - private boolean done; - /** - * @param itr - * @param bytesToSend - */ - public BufferedRangeIterator(RecordIterator itr, long bytesToSend) { - this.itr = itr; - this.bytesToSend = bytesToSend; - bytesSent = 0; - next = null; - done = false; - } - /* (non-Javadoc) - * @see org.archive.wayback.util.CleanableIterator#clean() - */ - public void close() throws IOException { - if(done == false) { - itr.close(); - done = true; - } - } - - /* (non-Javadoc) - * @see java.util.Iterator#hasNext() - */ - public boolean hasNext() { - if(done) return false; - if(next != null) return true; - if((bytesSent >= bytesToSend) || !itr.hasNext()) { - try { - close(); - } catch (IOException e) { - // TODO This is lame. What is the right way? - throw new RuntimeException(e); - } - return false; - } - next = (String) itr.next(); - return true; - } - - /* (non-Javadoc) - * @see java.util.Iterator#next() - */ - public String next() { - String returnString = next; - next = null; - bytesSent += returnString.length() + 1; // TODO: not X-platform! - return returnString; - } - - /* (non-Javadoc) - * @see java.util.Iterator#remove() - */ - public void remove() { - throw new NotImplementedException(); - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java 2008-06-05 20:39:13 UTC (rev 2283) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -1,170 +0,0 @@ -/* FileLocationDBServlet - * - * $Id$ - * - * Created on 5:35:31 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.locationdb; - -import java.io.IOException; -import java.io.OutputStream; -import java.text.ParseException; -import java.util.Iterator; -import java.util.Map; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.resourcestore.locationdb.FileLocationDB; -import org.archive.wayback.webapp.ServletRequestContext; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBServlet extends ServletRequestContext { - - protected static final String OPERATION_ARGUMENT = "operation"; - protected static final String NAME_ARGUMENT = "name"; - protected static final String URL_ARGUMENT = "url"; - protected static final String START_ARGUMENT = "start"; - protected static final String END_ARGUMENT = "end"; - protected static final String LOOKUP_OPERATION = "lookup"; - protected static final String GETMARK_OPERATION = "getmark"; - protected static final String GETRANGE_OPERATION = "getrange"; - protected static final String ADD_OPERATION = "add"; - protected static final String REMOVE_OPERATION = "remove"; - protected static final String NO_LOCATION_PREFIX = "ERROR No locations for"; - - private static final long serialVersionUID = 1L; - private FileLocationDB locationDB = null; - - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws IOException, - ServletException { - @SuppressWarnings("unchecked") - Map<String,String[]> queryMap = httpRequest.getParameterMap(); - String message; - FileLocationDB locationDB = getLocationDB(); - try { - message = handleOperation(locationDB,queryMap); - httpResponse.setStatus(HttpServletResponse.SC_OK); - httpResponse.setContentType("text/plain"); - OutputStream os = httpResponse.getOutputStream(); - os.write(message.getBytes()); - } catch (ParseException e) { - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, - e.getMessage()); - } - return true; - } - - private String handleOperation(FileLocationDB locationDB, - Map<String,String[]> queryMap) - throws ParseException { - - String operation = getRequiredMapParam(queryMap, OPERATION_ARGUMENT); - String message; - try { - if (operation.equals(LOOKUP_OPERATION)) { - String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); - - message = NO_LOCATION_PREFIX + " " + arcName; - String arcUrls[] = locationDB.arcToUrls(arcName); - if (arcUrls != null && arcUrls.length > 0) { - StringBuffer buf = new StringBuffer("OK "); - for (int i = 0; i < arcUrls.length; i++) { - buf.append("\n"); - buf.append(arcUrls[i]); - } - message = buf.toString(); - } - - } else if (operation.equals(GETMARK_OPERATION)) { - - message = "OK \n" + String.valueOf(locationDB.getCurrentMark()); - - } else if (operation.equals(GETRANGE_OPERATION)) { - - long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT)); - long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT)); - Iterator<String> itr = locationDB.getArcsBetweenMarks(start,end); - StringBuilder str = new StringBuilder(); - str.append("OK "); - while(itr.hasNext()) { - str.append("\n"); - str.append((String)itr.next()); - } - message = str.toString(); - - } else { - - String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); - String arcUrl = getRequiredMapParam(queryMap, URL_ARGUMENT); - if (operation.equals(ADD_OPERATION)) { - - locationDB.addArcUrl(arcName, arcUrl); - message = "OK added url " + arcUrl + " for " + arcName; - - } else if (operation.equals(REMOVE_OPERATION)) { - - getLocationDB().removeArcUrl(arcName, arcUrl); - message = "OK removed url " + arcUrl + " for " + arcName; - - } else { - - throw new ParseException("Unknown operation. Must be one " - + "of " + LOOKUP_OPERATION + "," + ADD_OPERATION - + ", or " + REMOVE_OPERATION + ".", 0); - } - } - - } catch (DatabaseException e) { - e.printStackTrace(); - message = e.getMessage(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - message = e.getMessage(); - } - return message; - } - - /** - * @return the locationDB - */ - public FileLocationDB getLocationDB() { - return locationDB; - } - - /** - * @param locationDB the locationDB to set - */ - public void setLocationDB(FileLocationDB locationDB) { - this.locationDB = locationDB; - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java (from rev 2282, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileProxyServlet.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -0,0 +1,124 @@ +/* ArcProxyServlet + * + * $Id$ + * + * Created on 6:19:54 PM Aug 10, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.net.URLConnection; +import java.text.ParseException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB; +import org.archive.wayback.webapp.ServletRequestContext; + +import com.sleepycat.je.DatabaseException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FileProxyServlet extends ServletRequestContext { + + private static final String RANGE_HTTP_HEADER = "Range"; + private static final String CONTENT_TYPE_HEADER = "Content-Type"; + private static final String CONTENT_TYPE = "application/x-gzip"; + /** + * + */ + private static final long serialVersionUID = 1L; + private ResourceFileLocationDB locationDB = null; + + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws IOException, + ServletException { + + try { + String arc = httpRequest.getRequestURI(); + arc = arc.substring(arc.lastIndexOf('/')+1); + if(arc.length() == 0) { + throw new ParseException("no/invalid arc",0); + } + String urls[] = locationDB.arcToUrls(arc); + if(urls == null || urls.length == 0) { + throw new DatabaseException("Unable to locate("+arc+")"); + } + String urlString = urls[0]; + String rangeHeader = httpRequest.getHeader(RANGE_HTTP_HEADER); + URL url = new URL(urlString); + URLConnection conn = url.openConnection(); + if(rangeHeader != null) { + conn.addRequestProperty(RANGE_HTTP_HEADER,rangeHeader); + } + InputStream is = conn.getInputStream(); + httpResponse.setStatus(HttpServletResponse.SC_OK); + String typeHeader = conn.getHeaderField(CONTENT_TYPE_HEADER); + if(typeHeader == null) { + typeHeader = CONTENT_TYPE; + } + httpResponse.setContentType(typeHeader); + OutputStream os = httpResponse.getOutputStream(); + int BUF_SIZE = 4096; + byte[] buffer = new byte[BUF_SIZE]; + try { + for (int r = -1; (r = is.read(buffer, 0, BUF_SIZE)) != -1;) { + os.write(buffer, 0, r); + } + } finally { + is.close(); + } + } catch (ParseException e) { + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, + e.getMessage()); + } catch (DatabaseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, + e.getMessage()); + } + return true; + } + + /** + * @return the locationDB + */ + public ResourceFileLocationDB getLocationDB() { + return locationDB; + } + + /** + * @param locationDB the locationDB to set + */ + public void setLocationDB(ResourceFileLocationDB locationDB) { + this.locationDB = locationDB; + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java (from rev 2282, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDB.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -0,0 +1,305 @@ +/* FileLocationDB + * + * $Id$ + * + * Created on 3:08:59 PM Aug 18, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.archive.wayback.bdb.BDBRecordSet; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.util.CloseableIterator; + +import com.sleepycat.je.DatabaseException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ResourceFileLocationDB extends BDBRecordSet { + + /** + * String id for implementation class of FileLocationDBs. + */ + public static final String FILE_LOCATION_DB_CLASS = "filelocationdb"; + + protected static final String ARC_DB_PATH = "filelocationdb.path"; + + protected static final String ARC_DB_NAME = "filelocationdb.name"; + + protected static final String ARC_DB_LOG = "filelocationdb.logpath"; + + private final static String urlDelimiter = " "; + + private final static String urlDelimiterRE = " "; + + private ResourceFileLocationDBLog log; + private String logPath = null; + private String bdbPath = null; + private String bdbName = null; + + /** + * Constructor + */ + public ResourceFileLocationDB() { + super(); + } + + /** + * @throws DatabaseException + * @throws ConfigurationException + */ + public void init() throws DatabaseException, ConfigurationException { + if(logPath == null) { + throw new ConfigurationException("No logPath"); + } + log = new ResourceFileLocationDBLog(logPath); + initializeDB(bdbPath,bdbName); + } + + /** + * return an array of String URLs for all known locations of the ARC file + * in the DB. + * @param arcName + * @return String[] of URLs to arcName + * @throws DatabaseException + */ + public String[] arcToUrls(final String arcName) throws DatabaseException { + + String[] arcUrls = null; + String valueString = get(arcName); + if(valueString != null && valueString.length() > 0) { + arcUrls = valueString.split(urlDelimiterRE); + } + return arcUrls; + } + + /** + * add an Url location for an arcName, unless it already exists + * @param arcName + * @param arcUrl + * @throws DatabaseException + * @throws IOException + */ + public void addArcUrl(final String arcName, final String arcUrl) throws DatabaseException, IOException { + + // need to first see if there is already an entry for this arcName. + // if not, add arcUrl as the value. + // if so, check the current arcUrl locations for arcName + // if arcUrl exists, do nothing + // if arcUrl does not exist, add, and set that as the value. + + String newValue = null; + String oldValue = get(arcName); + if(oldValue != null && oldValue.length() > 0) { + String curUrls[] = oldValue.split(urlDelimiterRE); + boolean found = false; + for(int i=0; i < curUrls.length; i++) { + if(arcUrl.equals(curUrls[i])) { + found = true; + break; + } + } + if(found == false) { + newValue = oldValue + " " + arcUrl; + } + } else { + // null or empty value + newValue = arcUrl; + if(oldValue == null) log.addArc(arcName); + } + + // did we find a value? + if(newValue != null) { + put(arcName,newValue); + } + } + + /** + * remove a single Url location for an arcName, if it exists + * @param arcName + * @param arcUrl + * @throws DatabaseException + */ + public void removeArcUrl(final String arcName, final String arcUrl) throws DatabaseException { + // need to first see if there is already an entry for this arcName. + // if not, do nothing + // if so, loop thru all current arcUrl locations for arcName + // keep any that are not arcUrl + // if any locations are left, update to the new value, sans arcUrl + // if none are left, remove the entry from the db + + StringBuilder newValue = new StringBuilder(); + String oldValue = get(arcName); + if(oldValue != null && oldValue.length() > 0) { + String curUrls[] = oldValue.split(urlDelimiterRE); + + for(int i=0; i < curUrls.length; i++) { + if(!arcUrl.equals(curUrls[i])) { + if(newValue.length() > 0) { + newValue.append(urlDelimiter); + } + newValue.append(curUrls[i]); + } + } + + if(newValue.length() > 0) { + + // update + put(arcName, newValue.toString()); + + } else { + + // remove the entry: + delete(arcName); + } + } + } + + /** + * @param start + * @param end + * @return Iterator for traversing arcs between start and end. + * @throws IOException + */ + public CloseableIterator<String> getArcsBetweenMarks(long start, long end) + throws IOException { + return log.getArcsBetweenMarks(start, end); + } + + /** + * @return current "Mark" for the log. Currently, it's just the length of + * the log file. + */ + public long getCurrentMark() { + return log.getCurrentMark(); + } + + /** + * @return the logPath + */ + public String getLogPath() { + return logPath; + } + + /** + * @param logPath the logPath to set + */ + public void setLogPath(String logPath) { + this.logPath = logPath; + } + + /** + * @return the bdbPath + */ + public String getBdbPath() { + return bdbPath; + } + + /** + * @param bdbPath the bdbPath to set + */ + public void setBdbPath(String bdbPath) { + this.bdbPath = bdbPath; + } + + /** + * @return the bdbName + */ + public String getBdbName() { + return bdbName; + } + + /** + * @param bdbName the bdbName to set + */ + public void setBdbName(String bdbName) { + this.bdbName = bdbName; + } + private static void USAGE(String message) { + System.err.print("USAGE: " + message + "\n" + + "\tDBDIR DBNAME LOGPATH\n" + + "\n" + + "\t\tread lines from STDIN formatted like:\n" + + "\t\t\tNAME<SPACE>URL\n" + + "\t\tand for each line, add to locationDB that file NAME is\n" + + "\t\tlocated at URL. Use locationDB in DBDIR at DBNAME, \n" + + "\t\tcreating if it does not exist.\n" + ); + System.exit(2); + } + + /** + * @param args + */ + public static void main(String[] args) { + if(args.length != 3) { + USAGE(""); + System.exit(1); + } + String bdbPath = args[0]; + String bdbName = args[1]; + String logPath = args[2]; + ResourceFileLocationDB db = new ResourceFileLocationDB(); + db.setBdbPath(bdbPath); + db.setBdbName(bdbName); + db.setLogPath(logPath); + BufferedReader r = new BufferedReader( + new InputStreamReader(System.in)); + String line; + int exitCode = 0; + try { + db.init(); + while((line = r.readLine()) != null) { + String parts[] = line.split(" "); + if(parts.length != 2) { + System.err.println("Bad input(" + line + ")"); + System.exit(2); + } + db.addArcUrl(parts[0],parts[1]); + System.out.println("Added\t" + parts[0] + "\t" + parts[1]); + } + } catch (IOException e) { + e.printStackTrace(); + exitCode = 1; + } catch (DatabaseException e) { + e.printStackTrace(); + exitCode = 1; + } catch (ConfigurationException e) { + e.printStackTrace(); + exitCode = 1; + } finally { + try { + db.shutdownDB(); + } catch (DatabaseException e) { + e.printStackTrace(); + exitCode = 1; + } + } + System.exit(exitCode); + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java (from rev 2282, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ResourceFileLocationDBClient.java 2008-06-05 20:42:53 UTC (rev 2284) @@ -0,0 +1,416 @@ +/* FileLocationDBClient + * + * $Id$ + * + * Created on 5:59:49 PM Aug 21, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Arrays; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpStatus; +import org.apache.commons.httpclient.NameValuePair; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.methods.PostMethod; +import org.apache.commons.httpclient.util.ParameterFormatter; +import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDBServlet; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ResourceFileLocationDBClient { + private static final Logger LOGGER = Logger.getLogger(ResourceFileLocationDBClient + .class.getName()); + + private final static String ARC_SUFFIX = ".arc"; + private final static String ARC_GZ_SUFFIX = ".arc.gz"; + private final static String WARC_SUFFIX = ".warc"; + private final static String WARC_GZ_SUFFIX = ".warc.gz"; + private final static String OK_RESPONSE_PREFIX = "OK "; + private HttpClient client = null; + + private String serverUrl = null; + + /** + * @param serverUrl + */ + public ResourceFileLocationDBClient(final String serverUrl) { + super(); + this.serverUrl = serverUrl; + this.client = new HttpClient(); + } + + /** + * @return long value representing the current end "mark" of the db log + * @throws NumberFormatException + * @throws IOException + */ + public long getCurrentMark() throws NumberFormatException, IOException { + NameValuePair[] args = { + new NameValuePair( + ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + ResourceFileLocationDBServlet.GETMARK_OPERATION), + }; + return Long.parseLong(doGetMethod(args)); + } + + /** + * @param start + * @param end + * @return Iterator of arc file names between marks start and end + * @throws IOException + */ + public Iterator<String> getArcsBetweenMarks(long start, long end) + throws IOException { + NameValuePair[] args = { + new NameValuePair( + ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + ResourceFileLocationDBServlet.GETRANGE_OPERATION), + new NameValuePair( + ResourceFileLocationDBServlet.START_ARGUMENT, + String.valueOf(start)), + new NameValuePair( + ResourceFileLocationDBServlet.END_ARGUMENT, + String.valueOf(end)) + }; + return Arrays.asList(doGetMethod(args).split("\n")).iterator(); + } + + /** + * return an array of String URLs for all known locations of the ARC file + * in the DB. + * @param arcName + * @return String[] of URLs to arcName + * @throws IOException + */ + public String[] arcToUrls(final String arcName) throws IOException { + + NameValuePair[] args = { + new NameValuePair( + ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + ResourceFileLocationDBServlet.LOOKUP_OPERATION), + + new NameValuePair( + ResourceFileLocationDBServlet.NAME_ARGUMENT, + arcName) + }; + String locations = doGetMethod(args); + if(locations != null) { + return locations.split("\n"); + } + return null; + } + + + /** + * add an Url location for an arcName, unless it already exists + * @param arcName + * @param arcUrl + * @throws IOException + */ + public void addArcUrl(final String arcName, final String arcUrl) + throws IOException { + doPostMethod(ResourceFileLocationDBServlet.ADD_OPERATION, arcName, arcUrl); + } + + /** + * remove a single Url location for an arcName, if it exists + * @param arcName + * @param arcUrl + * @throws IOException + */ + public void removeArcUrl(final String arcName, final String arcUrl) + throws IOException { + doPostMethod(ResourceFileLocationDBServlet.REMOVE_OPERATION, arcName, arcUrl); + } + + private String doGetMethod(NameValuePair[] data) throws IOException { + ParameterFormatter formatter = new ParameterFormatter(); + formatter.setAlwaysUseQuotes(false); + StringBuilder finalUrl = new StringBuilder(serverUrl); + if(data.length > 0) { + finalUrl.append("?"); + } + for(int i = 0; i < data.length; i++) { + if(i == 0) { + finalUrl.append("?"); + } else { + finalUrl.append("&"); + } + finalUrl.append(formatter.format(data[i])); + } + + GetMethod method = new GetMethod(finalUrl.toString()); + + int statusCode = client.executeMethod(method); + if (statusCode != HttpStatus.SC_OK) { + throw new IOException("Method failed: " + method.getStatusLine()); + } + String responseString = method.getResponseBodyAsString(); + if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { + if(responseString.startsWith(ResourceFileLocationDBServlet.NO_LOCATION_PREFIX)) { + return null; + } + throw new IOException(responseString); + } + return responseString.substring(OK_RESPONSE_PREFIX.length()+1); + } + + private void doPostMethod(final String operation, final String arcName, + final String arcUrl) + throws IOException { + PostMethod method = new PostMethod(serverUrl); + NameValuePair[] data = { + new NameValuePair(ResourceFileLocationDBServlet.OPERATION_ARGUMENT, + operation), + new NameValuePair(ResourceFileLocationDBServlet.NAME_ARGUMENT, + arcName), + new NameValuePair(ResourceFileLocationDBServlet.URL_ARGUMENT, + arcUrl) + }; + method.setRequestBody(data); + int statusCode = client.executeMethod(method); + if (statusCode != HttpStatus.SC_OK) { + throw new IOException("Method failed: " + method.getStatusLine()); + } + String responseString = method.getResponseBodyAsString(); + if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { + throw new IOException(responseString); + } + } + + private static void USAGE(String message) { + System.err.print("USAGE: " + message + "\n" + + "\t[lookup|add|remove|sync] ...\n" + + "\n" + + "\t lookup LOCATION-DB-URL ARC\n" + + "\t\temit all known URLs for arc ARC\n" + + "\n" + + "\t add LOCATION-DB-URL ARC URL\n" + + "\t\tinform locationDB that ARC is located at URL\n" + + "\n" + + "\t remove LOCATION-DB-URL ARC URL\n" + + "\t\tremove reference to ARC at URL in locationDB\n" + + "\n" + + "\t sync LOCATION-DB-URL DIR DIR-URL\n" + + "\t\tscan directory DIR, and submit all ARC files therein\n" + + "\t\tto locationDB at... [truncated message content] |
From: <bra...@us...> - 2008-06-05 20:39:07
|
Revision: 2283 http://archive-access.svn.sourceforge.net/archive-access/?rev=2283&view=rev Author: bradtofel Date: 2008-06-05 13:39:13 -0700 (Thu, 05 Jun 2008) Log Message: ----------- EMPTIED Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/ArcProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/ArcProxyServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/ArcProxyServlet.java 2008-06-05 20:38:55 UTC (rev 2282) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/ArcProxyServlet.java 2008-06-05 20:39:13 UTC (rev 2283) @@ -1,124 +0,0 @@ -/* ArcProxyServlet - * - * $Id$ - * - * Created on 6:19:54 PM Aug 10, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URL; -import java.net.URLConnection; -import java.text.ParseException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.resourcestore.http.FileLocationDB; -import org.archive.wayback.webapp.ServletRequestContext; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class ArcProxyServlet extends ServletRequestContext { - - private static final String RANGE_HTTP_HEADER = "Range"; - private static final String CONTENT_TYPE_HEADER = "Content-Type"; - private static final String CONTENT_TYPE = "application/x-gzip"; - /** - * - */ - private static final long serialVersionUID = 1L; - private FileLocationDB locationDB = null; - - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws IOException, - ServletException { - - try { - String arc = httpRequest.getRequestURI(); - arc = arc.substring(arc.lastIndexOf('/')+1); - if(arc.length() == 0) { - throw new ParseException("no/invalid arc",0); - } - String urls[] = locationDB.arcToUrls(arc); - if(urls == null || urls.length == 0) { - throw new DatabaseException("Unable to locate("+arc+")"); - } - String urlString = urls[0]; - String rangeHeader = httpRequest.getHeader(RANGE_HTTP_HEADER); - URL url = new URL(urlString); - URLConnection conn = url.openConnection(); - if(rangeHeader != null) { - conn.addRequestProperty(RANGE_HTTP_HEADER,rangeHeader); - } - InputStream is = conn.getInputStream(); - httpResponse.setStatus(HttpServletResponse.SC_OK); - String typeHeader = conn.getHeaderField(CONTENT_TYPE_HEADER); - if(typeHeader == null) { - typeHeader = CONTENT_TYPE; - } - httpResponse.setContentType(typeHeader); - OutputStream os = httpResponse.getOutputStream(); - int BUF_SIZE = 4096; - byte[] buffer = new byte[BUF_SIZE]; - try { - for (int r = -1; (r = is.read(buffer, 0, BUF_SIZE)) != -1;) { - os.write(buffer, 0, r); - } - } finally { - is.close(); - } - } catch (ParseException e) { - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, - e.getMessage()); - } catch (DatabaseException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, - e.getMessage()); - } - return true; - } - - /** - * @return the locationDB - */ - public FileLocationDB getLocationDB() { - return locationDB; - } - - /** - * @param locationDB the locationDB to set - */ - public void setLocationDB(FileLocationDB locationDB) { - this.locationDB = locationDB; - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java 2008-06-05 20:38:55 UTC (rev 2282) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java 2008-06-05 20:39:13 UTC (rev 2283) @@ -1,305 +0,0 @@ -/* FileLocationDB - * - * $Id$ - * - * Created on 3:08:59 PM Aug 18, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; - -import org.archive.wayback.bdb.BDBRecordSet; -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.util.CloseableIterator; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDB extends BDBRecordSet { - - /** - * String id for implementation class of FileLocationDBs. - */ - public static final String FILE_LOCATION_DB_CLASS = "filelocationdb"; - - protected static final String ARC_DB_PATH = "filelocationdb.path"; - - protected static final String ARC_DB_NAME = "filelocationdb.name"; - - protected static final String ARC_DB_LOG = "filelocationdb.logpath"; - - private final static String urlDelimiter = " "; - - private final static String urlDelimiterRE = " "; - - private FileLocationDBLog log; - private String logPath = null; - private String bdbPath = null; - private String bdbName = null; - - /** - * Constructor - */ - public FileLocationDB() { - super(); - } - - /** - * @throws DatabaseException - * @throws ConfigurationException - */ - public void init() throws DatabaseException, ConfigurationException { - if(logPath == null) { - throw new ConfigurationException("No logPath"); - } - log = new FileLocationDBLog(logPath); - initializeDB(bdbPath,bdbName); - } - - /** - * return an array of String URLs for all known locations of the ARC file - * in the DB. - * @param arcName - * @return String[] of URLs to arcName - * @throws DatabaseException - */ - public String[] arcToUrls(final String arcName) throws DatabaseException { - - String[] arcUrls = null; - String valueString = get(arcName); - if(valueString != null && valueString.length() > 0) { - arcUrls = valueString.split(urlDelimiterRE); - } - return arcUrls; - } - - /** - * add an Url location for an arcName, unless it already exists - * @param arcName - * @param arcUrl - * @throws DatabaseException - * @throws IOException - */ - public void addArcUrl(final String arcName, final String arcUrl) throws DatabaseException, IOException { - - // need to first see if there is already an entry for this arcName. - // if not, add arcUrl as the value. - // if so, check the current arcUrl locations for arcName - // if arcUrl exists, do nothing - // if arcUrl does not exist, add, and set that as the value. - - String newValue = null; - String oldValue = get(arcName); - if(oldValue != null && oldValue.length() > 0) { - String curUrls[] = oldValue.split(urlDelimiterRE); - boolean found = false; - for(int i=0; i < curUrls.length; i++) { - if(arcUrl.equals(curUrls[i])) { - found = true; - break; - } - } - if(found == false) { - newValue = oldValue + " " + arcUrl; - } - } else { - // null or empty value - newValue = arcUrl; - if(oldValue == null) log.addArc(arcName); - } - - // did we find a value? - if(newValue != null) { - put(arcName,newValue); - } - } - - /** - * remove a single Url location for an arcName, if it exists - * @param arcName - * @param arcUrl - * @throws DatabaseException - */ - public void removeArcUrl(final String arcName, final String arcUrl) throws DatabaseException { - // need to first see if there is already an entry for this arcName. - // if not, do nothing - // if so, loop thru all current arcUrl locations for arcName - // keep any that are not arcUrl - // if any locations are left, update to the new value, sans arcUrl - // if none are left, remove the entry from the db - - StringBuilder newValue = new StringBuilder(); - String oldValue = get(arcName); - if(oldValue != null && oldValue.length() > 0) { - String curUrls[] = oldValue.split(urlDelimiterRE); - - for(int i=0; i < curUrls.length; i++) { - if(!arcUrl.equals(curUrls[i])) { - if(newValue.length() > 0) { - newValue.append(urlDelimiter); - } - newValue.append(curUrls[i]); - } - } - - if(newValue.length() > 0) { - - // update - put(arcName, newValue.toString()); - - } else { - - // remove the entry: - delete(arcName); - } - } - } - - /** - * @param start - * @param end - * @return Iterator for traversing arcs between start and end. - * @throws IOException - */ - public CloseableIterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - return log.getArcsBetweenMarks(start, end); - } - - /** - * @return current "Mark" for the log. Currently, it's just the length of - * the log file. - */ - public long getCurrentMark() { - return log.getCurrentMark(); - } - - /** - * @return the logPath - */ - public String getLogPath() { - return logPath; - } - - /** - * @param logPath the logPath to set - */ - public void setLogPath(String logPath) { - this.logPath = logPath; - } - - /** - * @return the bdbPath - */ - public String getBdbPath() { - return bdbPath; - } - - /** - * @param bdbPath the bdbPath to set - */ - public void setBdbPath(String bdbPath) { - this.bdbPath = bdbPath; - } - - /** - * @return the bdbName - */ - public String getBdbName() { - return bdbName; - } - - /** - * @param bdbName the bdbName to set - */ - public void setBdbName(String bdbName) { - this.bdbName = bdbName; - } - private static void USAGE(String message) { - System.err.print("USAGE: " + message + "\n" + - "\tDBDIR DBNAME LOGPATH\n" + - "\n" + - "\t\tread lines from STDIN formatted like:\n" + - "\t\t\tNAME<SPACE>URL\n" + - "\t\tand for each line, add to locationDB that file NAME is\n" + - "\t\tlocated at URL. Use locationDB in DBDIR at DBNAME, \n" + - "\t\tcreating if it does not exist.\n" - ); - System.exit(2); - } - - /** - * @param args - */ - public static void main(String[] args) { - if(args.length != 3) { - USAGE(""); - System.exit(1); - } - String bdbPath = args[0]; - String bdbName = args[1]; - String logPath = args[2]; - FileLocationDB db = new FileLocationDB(); - db.setBdbPath(bdbPath); - db.setBdbName(bdbName); - db.setLogPath(logPath); - BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); - String line; - int exitCode = 0; - try { - db.init(); - while((line = r.readLine()) != null) { - String parts[] = line.split(" "); - if(parts.length != 2) { - System.err.println("Bad input(" + line + ")"); - System.exit(2); - } - db.addArcUrl(parts[0],parts[1]); - System.out.println("Added\t" + parts[0] + "\t" + parts[1]); - } - } catch (IOException e) { - e.printStackTrace(); - exitCode = 1; - } catch (DatabaseException e) { - e.printStackTrace(); - exitCode = 1; - } catch (ConfigurationException e) { - e.printStackTrace(); - exitCode = 1; - } finally { - try { - db.shutdownDB(); - } catch (DatabaseException e) { - e.printStackTrace(); - exitCode = 1; - } - } - System.exit(exitCode); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java 2008-06-05 20:38:55 UTC (rev 2282) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java 2008-06-05 20:39:13 UTC (rev 2283) @@ -1,416 +0,0 @@ -/* FileLocationDBClient - * - * $Id$ - * - * Created on 5:59:49 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileFilter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.Arrays; -import java.util.Iterator; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpStatus; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.PostMethod; -import org.apache.commons.httpclient.util.ParameterFormatter; -import org.archive.wayback.resourcestore.http.FileLocationDBServlet; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBClient { - private static final Logger LOGGER = Logger.getLogger(FileLocationDBClient - .class.getName()); - - private final static String ARC_SUFFIX = ".arc"; - private final static String ARC_GZ_SUFFIX = ".arc.gz"; - private final static String WARC_SUFFIX = ".warc"; - private final static String WARC_GZ_SUFFIX = ".warc.gz"; - private final static String OK_RESPONSE_PREFIX = "OK "; - private HttpClient client = null; - - private String serverUrl = null; - - /** - * @param serverUrl - */ - public FileLocationDBClient(final String serverUrl) { - super(); - this.serverUrl = serverUrl; - this.client = new HttpClient(); - } - - /** - * @return long value representing the current end "mark" of the db log - * @throws NumberFormatException - * @throws IOException - */ - public long getCurrentMark() throws NumberFormatException, IOException { - NameValuePair[] args = { - new NameValuePair( - FileLocationDBServlet.OPERATION_ARGUMENT, - FileLocationDBServlet.GETMARK_OPERATION), - }; - return Long.parseLong(doGetMethod(args)); - } - - /** - * @param start - * @param end - * @return Iterator of arc file names between marks start and end - * @throws IOException - */ - public Iterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - NameValuePair[] args = { - new NameValuePair( - FileLocationDBServlet.OPERATION_ARGUMENT, - FileLocationDBServlet.GETRANGE_OPERATION), - new NameValuePair( - FileLocationDBServlet.START_ARGUMENT, - String.valueOf(start)), - new NameValuePair( - FileLocationDBServlet.END_ARGUMENT, - String.valueOf(end)) - }; - return Arrays.asList(doGetMethod(args).split("\n")).iterator(); - } - - /** - * return an array of String URLs for all known locations of the ARC file - * in the DB. - * @param arcName - * @return String[] of URLs to arcName - * @throws IOException - */ - public String[] arcToUrls(final String arcName) throws IOException { - - NameValuePair[] args = { - new NameValuePair( - FileLocationDBServlet.OPERATION_ARGUMENT, - FileLocationDBServlet.LOOKUP_OPERATION), - - new NameValuePair( - FileLocationDBServlet.NAME_ARGUMENT, - arcName) - }; - String locations = doGetMethod(args); - if(locations != null) { - return locations.split("\n"); - } - return null; - } - - - /** - * add an Url location for an arcName, unless it already exists - * @param arcName - * @param arcUrl - * @throws IOException - */ - public void addArcUrl(final String arcName, final String arcUrl) - throws IOException { - doPostMethod(FileLocationDBServlet.ADD_OPERATION, arcName, arcUrl); - } - - /** - * remove a single Url location for an arcName, if it exists - * @param arcName - * @param arcUrl - * @throws IOException - */ - public void removeArcUrl(final String arcName, final String arcUrl) - throws IOException { - doPostMethod(FileLocationDBServlet.REMOVE_OPERATION, arcName, arcUrl); - } - - private String doGetMethod(NameValuePair[] data) throws IOException { - ParameterFormatter formatter = new ParameterFormatter(); - formatter.setAlwaysUseQuotes(false); - StringBuilder finalUrl = new StringBuilder(serverUrl); - if(data.length > 0) { - finalUrl.append("?"); - } - for(int i = 0; i < data.length; i++) { - if(i == 0) { - finalUrl.append("?"); - } else { - finalUrl.append("&"); - } - finalUrl.append(formatter.format(data[i])); - } - - GetMethod method = new GetMethod(finalUrl.toString()); - - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine()); - } - String responseString = method.getResponseBodyAsString(); - if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { - if(responseString.startsWith(FileLocationDBServlet.NO_LOCATION_PREFIX)) { - return null; - } - throw new IOException(responseString); - } - return responseString.substring(OK_RESPONSE_PREFIX.length()+1); - } - - private void doPostMethod(final String operation, final String arcName, - final String arcUrl) - throws IOException { - PostMethod method = new PostMethod(serverUrl); - NameValuePair[] data = { - new NameValuePair(FileLocationDBServlet.OPERATION_ARGUMENT, - operation), - new NameValuePair(FileLocationDBServlet.NAME_ARGUMENT, - arcName), - new NameValuePair(FileLocationDBServlet.URL_ARGUMENT, - arcUrl) - }; - method.setRequestBody(data); - int statusCode = client.executeMethod(method); - if (statusCode != HttpStatus.SC_OK) { - throw new IOException("Method failed: " + method.getStatusLine()); - } - String responseString = method.getResponseBodyAsString(); - if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { - throw new IOException(responseString); - } - } - - private static void USAGE(String message) { - System.err.print("USAGE: " + message + "\n" + - "\t[lookup|add|remove|sync] ...\n" + - "\n" + - "\t lookup LOCATION-DB-URL ARC\n" + - "\t\temit all known URLs for arc ARC\n" + - "\n" + - "\t add LOCATION-DB-URL ARC URL\n" + - "\t\tinform locationDB that ARC is located at URL\n" + - "\n" + - "\t remove LOCATION-DB-URL ARC URL\n" + - "\t\tremove reference to ARC at URL in locationDB\n" + - "\n" + - "\t sync LOCATION-DB-URL DIR DIR-URL\n" + - "\t\tscan directory DIR, and submit all ARC files therein\n" + - "\t\tto locationDB at url DIR-URL/ARC\n" + - "\n" + - "\t get-mark LOCATION-DB-URL\n" + - "\t\temit an identifier for the current marker in the \n" + - "\t\tlocationDB log. These identifiers can be used with the\n" + - "\t\tmark-range operation.\n" + - "\n" + - "\t mark-range LOCATION-DB-URL START END\n" + - "\t\temit to STDOUT one line with the name of all ARC files\n" + - "\t\tadded to the locationDB between marks START and END\n" + - "\n" + - "\t add-stream LOCATION-DB-URL\n" + - "\t\tread lines from STDIN formatted like:\n" + - "\t\t\tNAME<SPACE>URL\n" + - "\t\tand for each line, inform locationDB that file NAME is\n" + - "\t\tlocated at URL\n" - ); - System.exit(2); - } - - /** - * @param args - */ - public static void main(String[] args) { - if(args.length < 2) { - USAGE(""); - System.exit(1); - } - String operation = args[0]; - String url = args[1]; - if(!url.startsWith("http://")) { - USAGE("URL argument 1 must begin with http://"); - } - - FileLocationDBClient locationClient = new FileLocationDBClient(url); - - if(operation.equalsIgnoreCase("add-stream")) { - BufferedReader r = new BufferedReader( - new InputStreamReader(System.in)); - String line; - try { - while((line = r.readLine()) != null) { - String parts[] = line.split(" "); - if(parts.length != 2) { - System.err.println("Bad input(" + line + ")"); - System.exit(2); - } - locationClient.addArcUrl(parts[0],parts[1]); - System.out.println("Added\t" + parts[0] + "\t" + parts[1]); - } - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } - - } else { - if(args.length < 3) { - USAGE(""); - System.exit(1); - } - String arc = args[2]; - if(operation.equalsIgnoreCase("lookup")) { - if(args.length < 3) { - USAGE("lookup LOCATION-URL ARC"); - } - try { - String[] locations = locationClient.arcToUrls(arc); - if(locations == null) { - System.err.println("No locations for " + arc); - System.exit(1); - } - for(int i=0; i <locations.length; i++) { - System.out.println(locations[i]); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("get-mark")) { - if(args.length != 2) { - USAGE("get-mark LOCATION-URL"); - } - try { - long mark = locationClient.getCurrentMark(); - System.out.println(mark); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("mark-range")) { - if(args.length != 4) { - USAGE("mark-range LOCATION-URL START END"); - } - long start = Long.parseLong(args[3]); - long end = Long.parseLong(args[4]); - try { - Iterator<String> it = - locationClient.getArcsBetweenMarks(start,end); - while(it.hasNext()) { - String next = (String) it.next(); - System.out.println(next); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - - } else if(operation.equalsIgnoreCase("add")) { - if(args.length != 4) { - USAGE("add LOCATION-URL ARC ARC-URL"); - } - String arcUrl = args[3]; - if(!arcUrl.startsWith("http://")) { - USAGE("ARC-URL argument 4 must begin with http://"); - } - try { - locationClient.addArcUrl(arc,arcUrl); - System.out.println("OK"); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("remove")) { - - if(args.length != 4) { - USAGE("remove LOCATION-URL ARC ARC-URL"); - } - String arcUrl = args[3]; - if(!arcUrl.startsWith("http://")) { - USAGE("ARC-URL argument 4 must begin with http://"); - } - try { - locationClient.removeArcUrl(arc,arcUrl); - System.out.println("OK"); - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else if(operation.equalsIgnoreCase("sync")) { - - if(args.length != 4) { - USAGE("sync LOCATION-URL DIR DIR-URL"); - } - File dir = new File(arc); - String dirUrl = args[3]; - if(!dirUrl.startsWith("http://")) { - USAGE("DIR-URL argument 4 must begin with http://"); - } - try { - if(!dir.isDirectory()) { - USAGE("DIR " + arc + " is not a directory"); - } - - FileFilter filter = new FileFilter() { - public boolean accept(File daFile) { - return daFile.isFile() && - (daFile.getName().endsWith(ARC_SUFFIX) || - daFile.getName().endsWith(ARC_GZ_SUFFIX) || - daFile.getName().endsWith(WARC_SUFFIX) || - daFile.getName().endsWith(WARC_GZ_SUFFIX)); - } - }; - - File[] files = dir.listFiles(filter); - if(files == null) { - throw new IOException("Directory " + dir.getAbsolutePath() + - " is not a directory or had an IO error"); - } - for(int i = 0; i < files.length; i++) { - File file = files[i]; - String name = file.getName(); - String fileUrl = dirUrl + name; - LOGGER.info("Adding location " + fileUrl + " for file " + name); - locationClient.addArcUrl(name,fileUrl); - } - } catch (IOException e) { - System.err.println(e.getMessage()); - System.exit(1); - } - - } else { - USAGE(" unknown operation " + operation); - } - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java 2008-06-05 20:38:55 UTC (rev 2282) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java 2008-06-05 20:39:13 UTC (rev 2283) @@ -1,173 +0,0 @@ -/* FileLocationDBLog - * - * $Id$ - * - * Created on 2:38:18 PM Aug 18, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.RandomAccessFile; - -import org.archive.wayback.exception.ConfigurationException; -import org.archive.wayback.util.CloseableIterator; -import org.archive.wayback.util.flatfile.RecordIterator; - -import sun.reflect.generics.reflectiveObjects.NotImplementedException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBLog extends File { - - /** - * - */ - private static final long serialVersionUID = -9128222006544481378L; - - /** - * @param pathname - * @throws ConfigurationException - */ - public FileLocationDBLog(String pathname) throws ConfigurationException { - super(pathname); - if (!isFile()) { - if (exists()) { - throw new ConfigurationException("path(" + pathname - + ") exists but is not a file!"); - } - try { - if (!createNewFile()) { - throw new ConfigurationException( - "Unable to create empty file " + pathname); - } - } catch (IOException e) { - e.printStackTrace(); - throw new ConfigurationException("Unable to create empty file " - + pathname); - } - } - } - - /** - * @return long value indicating the current end position of the log - */ - public long getCurrentMark() { - return length(); - } - - /** - * @param start - * @param end - * @return CleanableIterator that returns all arcs between start and end - * @throws IOException - */ - public CloseableIterator<String> getArcsBetweenMarks(long start, long end) - throws IOException { - - RandomAccessFile raf = new RandomAccessFile(this, "r"); - raf.seek(start); - BufferedReader is = new BufferedReader(new FileReader(raf.getFD())); - return new BufferedRangeIterator(new RecordIterator(is),end - start); - } - - /** - * @param arcName - * @throws IOException - */ - public synchronized void addArc(String arcName) throws IOException { - FileWriter writer = new FileWriter(this, true); - writer.write(arcName + "\n"); - writer.flush(); - writer.close(); - } - - private class BufferedRangeIterator implements CloseableIterator<String> { - private RecordIterator itr; - private long bytesToSend; - private long bytesSent; - private String next; - private boolean done; - /** - * @param itr - * @param bytesToSend - */ - public BufferedRangeIterator(RecordIterator itr, long bytesToSend) { - this.itr = itr; - this.bytesToSend = bytesToSend; - bytesSent = 0; - next = null; - done = false; - } - /* (non-Javadoc) - * @see org.archive.wayback.util.CleanableIterator#clean() - */ - public void close() throws IOException { - if(done == false) { - itr.close(); - done = true; - } - } - - /* (non-Javadoc) - * @see java.util.Iterator#hasNext() - */ - public boolean hasNext() { - if(done) return false; - if(next != null) return true; - if((bytesSent >= bytesToSend) || !itr.hasNext()) { - try { - close(); - } catch (IOException e) { - // TODO This is lame. What is the right way? - throw new RuntimeException(e); - } - return false; - } - next = (String) itr.next(); - return true; - } - - /* (non-Javadoc) - * @see java.util.Iterator#next() - */ - public String next() { - String returnString = next; - next = null; - bytesSent += returnString.length() + 1; // TODO: not X-platform! - return returnString; - } - - /* (non-Javadoc) - * @see java.util.Iterator#remove() - */ - public void remove() { - throw new NotImplementedException(); - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java 2008-06-05 20:38:55 UTC (rev 2282) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java 2008-06-05 20:39:13 UTC (rev 2283) @@ -1,170 +0,0 @@ -/* FileLocationDBServlet - * - * $Id$ - * - * Created on 5:35:31 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.IOException; -import java.io.OutputStream; -import java.text.ParseException; -import java.util.Iterator; -import java.util.Map; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.archive.wayback.resourcestore.http.FileLocationDB; -import org.archive.wayback.webapp.ServletRequestContext; - -import com.sleepycat.je.DatabaseException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBServlet extends ServletRequestContext { - - protected static final String OPERATION_ARGUMENT = "operation"; - protected static final String NAME_ARGUMENT = "name"; - protected static final String URL_ARGUMENT = "url"; - protected static final String START_ARGUMENT = "start"; - protected static final String END_ARGUMENT = "end"; - protected static final String LOOKUP_OPERATION = "lookup"; - protected static final String GETMARK_OPERATION = "getmark"; - protected static final String GETRANGE_OPERATION = "getrange"; - protected static final String ADD_OPERATION = "add"; - protected static final String REMOVE_OPERATION = "remove"; - protected static final String NO_LOCATION_PREFIX = "ERROR No locations for"; - - private static final long serialVersionUID = 1L; - private FileLocationDB locationDB = null; - - public boolean handleRequest(HttpServletRequest httpRequest, - HttpServletResponse httpResponse) throws IOException, - ServletException { - @SuppressWarnings("unchecked") - Map<String,String[]> queryMap = httpRequest.getParameterMap(); - String message; - FileLocationDB locationDB = getLocationDB(); - try { - message = handleOperation(locationDB,queryMap); - httpResponse.setStatus(HttpServletResponse.SC_OK); - httpResponse.setContentType("text/plain"); - OutputStream os = httpResponse.getOutputStream(); - os.write(message.getBytes()); - } catch (ParseException e) { - e.printStackTrace(); - httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, - e.getMessage()); - } - return true; - } - - private String handleOperation(FileLocationDB locationDB, - Map<String,String[]> queryMap) - throws ParseException { - - String operation = getRequiredMapParam(queryMap, OPERATION_ARGUMENT); - String message; - try { - if (operation.equals(LOOKUP_OPERATION)) { - String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); - - message = NO_LOCATION_PREFIX + " " + arcName; - String arcUrls[] = locationDB.arcToUrls(arcName); - if (arcUrls != null && arcUrls.length > 0) { - StringBuffer buf = new StringBuffer("OK "); - for (int i = 0; i < arcUrls.length; i++) { - buf.append("\n"); - buf.append(arcUrls[i]); - } - message = buf.toString(); - } - - } else if (operation.equals(GETMARK_OPERATION)) { - - message = "OK \n" + String.valueOf(locationDB.getCurrentMark()); - - } else if (operation.equals(GETRANGE_OPERATION)) { - - long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT)); - long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT)); - Iterator<String> itr = locationDB.getArcsBetweenMarks(start,end); - StringBuilder str = new StringBuilder(); - str.append("OK "); - while(itr.hasNext()) { - str.append("\n"); - str.append((String)itr.next()); - } - message = str.toString(); - - } else { - - String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); - String arcUrl = getRequiredMapParam(queryMap, URL_ARGUMENT); - if (operation.equals(ADD_OPERATION)) { - - locationDB.addArcUrl(arcName, arcUrl); - message = "OK added url " + arcUrl + " for " + arcName; - - } else if (operation.equals(REMOVE_OPERATION)) { - - getLocationDB().removeArcUrl(arcName, arcUrl); - message = "OK removed url " + arcUrl + " for " + arcName; - - } else { - - throw new ParseException("Unknown operation. Must be one " - + "of " + LOOKUP_OPERATION + "," + ADD_OPERATION - + ", or " + REMOVE_OPERATION + ".", 0); - } - } - - } catch (DatabaseException e) { - e.printStackTrace(); - message = e.getMessage(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - message = e.getMessage(); - } - return message; - } - - /** - * @return the locationDB - */ - public FileLocationDB getLocationDB() { - return locationDB; - } - - /** - * @param locationDB the locationDB to set - */ - public void setLocationDB(FileLocationDB locationDB) { - this.locationDB = locationDB; - } -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-05 20:38:48
|
Revision: 2282 http://archive-access.svn.sourceforge.net/archive-access/?rev=2282&view=rev Author: bradtofel Date: 2008-06-05 13:38:55 -0700 (Thu, 05 Jun 2008) Log Message: ----------- MOVED: change package name Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java (from rev 2055, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/ArcProxyServlet.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ArcProxyServlet.java 2008-06-05 20:38:55 UTC (rev 2282) @@ -0,0 +1,124 @@ +/* ArcProxyServlet + * + * $Id$ + * + * Created on 6:19:54 PM Aug 10, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; +import java.net.URLConnection; +import java.text.ParseException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.resourcestore.locationdb.FileLocationDB; +import org.archive.wayback.webapp.ServletRequestContext; + +import com.sleepycat.je.DatabaseException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ArcProxyServlet extends ServletRequestContext { + + private static final String RANGE_HTTP_HEADER = "Range"; + private static final String CONTENT_TYPE_HEADER = "Content-Type"; + private static final String CONTENT_TYPE = "application/x-gzip"; + /** + * + */ + private static final long serialVersionUID = 1L; + private FileLocationDB locationDB = null; + + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws IOException, + ServletException { + + try { + String arc = httpRequest.getRequestURI(); + arc = arc.substring(arc.lastIndexOf('/')+1); + if(arc.length() == 0) { + throw new ParseException("no/invalid arc",0); + } + String urls[] = locationDB.arcToUrls(arc); + if(urls == null || urls.length == 0) { + throw new DatabaseException("Unable to locate("+arc+")"); + } + String urlString = urls[0]; + String rangeHeader = httpRequest.getHeader(RANGE_HTTP_HEADER); + URL url = new URL(urlString); + URLConnection conn = url.openConnection(); + if(rangeHeader != null) { + conn.addRequestProperty(RANGE_HTTP_HEADER,rangeHeader); + } + InputStream is = conn.getInputStream(); + httpResponse.setStatus(HttpServletResponse.SC_OK); + String typeHeader = conn.getHeaderField(CONTENT_TYPE_HEADER); + if(typeHeader == null) { + typeHeader = CONTENT_TYPE; + } + httpResponse.setContentType(typeHeader); + OutputStream os = httpResponse.getOutputStream(); + int BUF_SIZE = 4096; + byte[] buffer = new byte[BUF_SIZE]; + try { + for (int r = -1; (r = is.read(buffer, 0, BUF_SIZE)) != -1;) { + os.write(buffer, 0, r); + } + } finally { + is.close(); + } + } catch (ParseException e) { + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, + e.getMessage()); + } catch (DatabaseException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_NOT_FOUND, + e.getMessage()); + } + return true; + } + + /** + * @return the locationDB + */ + public FileLocationDB getLocationDB() { + return locationDB; + } + + /** + * @param locationDB the locationDB to set + */ + public void setLocationDB(FileLocationDB locationDB) { + this.locationDB = locationDB; + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java (from rev 2261, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDB.java 2008-06-05 20:38:55 UTC (rev 2282) @@ -0,0 +1,305 @@ +/* FileLocationDB + * + * $Id$ + * + * Created on 3:08:59 PM Aug 18, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.archive.wayback.bdb.BDBRecordSet; +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.util.CloseableIterator; + +import com.sleepycat.je.DatabaseException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FileLocationDB extends BDBRecordSet { + + /** + * String id for implementation class of FileLocationDBs. + */ + public static final String FILE_LOCATION_DB_CLASS = "filelocationdb"; + + protected static final String ARC_DB_PATH = "filelocationdb.path"; + + protected static final String ARC_DB_NAME = "filelocationdb.name"; + + protected static final String ARC_DB_LOG = "filelocationdb.logpath"; + + private final static String urlDelimiter = " "; + + private final static String urlDelimiterRE = " "; + + private FileLocationDBLog log; + private String logPath = null; + private String bdbPath = null; + private String bdbName = null; + + /** + * Constructor + */ + public FileLocationDB() { + super(); + } + + /** + * @throws DatabaseException + * @throws ConfigurationException + */ + public void init() throws DatabaseException, ConfigurationException { + if(logPath == null) { + throw new ConfigurationException("No logPath"); + } + log = new FileLocationDBLog(logPath); + initializeDB(bdbPath,bdbName); + } + + /** + * return an array of String URLs for all known locations of the ARC file + * in the DB. + * @param arcName + * @return String[] of URLs to arcName + * @throws DatabaseException + */ + public String[] arcToUrls(final String arcName) throws DatabaseException { + + String[] arcUrls = null; + String valueString = get(arcName); + if(valueString != null && valueString.length() > 0) { + arcUrls = valueString.split(urlDelimiterRE); + } + return arcUrls; + } + + /** + * add an Url location for an arcName, unless it already exists + * @param arcName + * @param arcUrl + * @throws DatabaseException + * @throws IOException + */ + public void addArcUrl(final String arcName, final String arcUrl) throws DatabaseException, IOException { + + // need to first see if there is already an entry for this arcName. + // if not, add arcUrl as the value. + // if so, check the current arcUrl locations for arcName + // if arcUrl exists, do nothing + // if arcUrl does not exist, add, and set that as the value. + + String newValue = null; + String oldValue = get(arcName); + if(oldValue != null && oldValue.length() > 0) { + String curUrls[] = oldValue.split(urlDelimiterRE); + boolean found = false; + for(int i=0; i < curUrls.length; i++) { + if(arcUrl.equals(curUrls[i])) { + found = true; + break; + } + } + if(found == false) { + newValue = oldValue + " " + arcUrl; + } + } else { + // null or empty value + newValue = arcUrl; + if(oldValue == null) log.addArc(arcName); + } + + // did we find a value? + if(newValue != null) { + put(arcName,newValue); + } + } + + /** + * remove a single Url location for an arcName, if it exists + * @param arcName + * @param arcUrl + * @throws DatabaseException + */ + public void removeArcUrl(final String arcName, final String arcUrl) throws DatabaseException { + // need to first see if there is already an entry for this arcName. + // if not, do nothing + // if so, loop thru all current arcUrl locations for arcName + // keep any that are not arcUrl + // if any locations are left, update to the new value, sans arcUrl + // if none are left, remove the entry from the db + + StringBuilder newValue = new StringBuilder(); + String oldValue = get(arcName); + if(oldValue != null && oldValue.length() > 0) { + String curUrls[] = oldValue.split(urlDelimiterRE); + + for(int i=0; i < curUrls.length; i++) { + if(!arcUrl.equals(curUrls[i])) { + if(newValue.length() > 0) { + newValue.append(urlDelimiter); + } + newValue.append(curUrls[i]); + } + } + + if(newValue.length() > 0) { + + // update + put(arcName, newValue.toString()); + + } else { + + // remove the entry: + delete(arcName); + } + } + } + + /** + * @param start + * @param end + * @return Iterator for traversing arcs between start and end. + * @throws IOException + */ + public CloseableIterator<String> getArcsBetweenMarks(long start, long end) + throws IOException { + return log.getArcsBetweenMarks(start, end); + } + + /** + * @return current "Mark" for the log. Currently, it's just the length of + * the log file. + */ + public long getCurrentMark() { + return log.getCurrentMark(); + } + + /** + * @return the logPath + */ + public String getLogPath() { + return logPath; + } + + /** + * @param logPath the logPath to set + */ + public void setLogPath(String logPath) { + this.logPath = logPath; + } + + /** + * @return the bdbPath + */ + public String getBdbPath() { + return bdbPath; + } + + /** + * @param bdbPath the bdbPath to set + */ + public void setBdbPath(String bdbPath) { + this.bdbPath = bdbPath; + } + + /** + * @return the bdbName + */ + public String getBdbName() { + return bdbName; + } + + /** + * @param bdbName the bdbName to set + */ + public void setBdbName(String bdbName) { + this.bdbName = bdbName; + } + private static void USAGE(String message) { + System.err.print("USAGE: " + message + "\n" + + "\tDBDIR DBNAME LOGPATH\n" + + "\n" + + "\t\tread lines from STDIN formatted like:\n" + + "\t\t\tNAME<SPACE>URL\n" + + "\t\tand for each line, add to locationDB that file NAME is\n" + + "\t\tlocated at URL. Use locationDB in DBDIR at DBNAME, \n" + + "\t\tcreating if it does not exist.\n" + ); + System.exit(2); + } + + /** + * @param args + */ + public static void main(String[] args) { + if(args.length != 3) { + USAGE(""); + System.exit(1); + } + String bdbPath = args[0]; + String bdbName = args[1]; + String logPath = args[2]; + FileLocationDB db = new FileLocationDB(); + db.setBdbPath(bdbPath); + db.setBdbName(bdbName); + db.setLogPath(logPath); + BufferedReader r = new BufferedReader( + new InputStreamReader(System.in)); + String line; + int exitCode = 0; + try { + db.init(); + while((line = r.readLine()) != null) { + String parts[] = line.split(" "); + if(parts.length != 2) { + System.err.println("Bad input(" + line + ")"); + System.exit(2); + } + db.addArcUrl(parts[0],parts[1]); + System.out.println("Added\t" + parts[0] + "\t" + parts[1]); + } + } catch (IOException e) { + e.printStackTrace(); + exitCode = 1; + } catch (DatabaseException e) { + e.printStackTrace(); + exitCode = 1; + } catch (ConfigurationException e) { + e.printStackTrace(); + exitCode = 1; + } finally { + try { + db.shutdownDB(); + } catch (DatabaseException e) { + e.printStackTrace(); + exitCode = 1; + } + } + System.exit(exitCode); + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java (from rev 2210, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBClient.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBClient.java 2008-06-05 20:38:55 UTC (rev 2282) @@ -0,0 +1,416 @@ +/* FileLocationDBClient + * + * $Id$ + * + * Created on 5:59:49 PM Aug 21, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Arrays; +import java.util.Iterator; +import java.util.logging.Logger; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpStatus; +import org.apache.commons.httpclient.NameValuePair; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.methods.PostMethod; +import org.apache.commons.httpclient.util.ParameterFormatter; +import org.archive.wayback.resourcestore.locationdb.FileLocationDBServlet; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FileLocationDBClient { + private static final Logger LOGGER = Logger.getLogger(FileLocationDBClient + .class.getName()); + + private final static String ARC_SUFFIX = ".arc"; + private final static String ARC_GZ_SUFFIX = ".arc.gz"; + private final static String WARC_SUFFIX = ".warc"; + private final static String WARC_GZ_SUFFIX = ".warc.gz"; + private final static String OK_RESPONSE_PREFIX = "OK "; + private HttpClient client = null; + + private String serverUrl = null; + + /** + * @param serverUrl + */ + public FileLocationDBClient(final String serverUrl) { + super(); + this.serverUrl = serverUrl; + this.client = new HttpClient(); + } + + /** + * @return long value representing the current end "mark" of the db log + * @throws NumberFormatException + * @throws IOException + */ + public long getCurrentMark() throws NumberFormatException, IOException { + NameValuePair[] args = { + new NameValuePair( + FileLocationDBServlet.OPERATION_ARGUMENT, + FileLocationDBServlet.GETMARK_OPERATION), + }; + return Long.parseLong(doGetMethod(args)); + } + + /** + * @param start + * @param end + * @return Iterator of arc file names between marks start and end + * @throws IOException + */ + public Iterator<String> getArcsBetweenMarks(long start, long end) + throws IOException { + NameValuePair[] args = { + new NameValuePair( + FileLocationDBServlet.OPERATION_ARGUMENT, + FileLocationDBServlet.GETRANGE_OPERATION), + new NameValuePair( + FileLocationDBServlet.START_ARGUMENT, + String.valueOf(start)), + new NameValuePair( + FileLocationDBServlet.END_ARGUMENT, + String.valueOf(end)) + }; + return Arrays.asList(doGetMethod(args).split("\n")).iterator(); + } + + /** + * return an array of String URLs for all known locations of the ARC file + * in the DB. + * @param arcName + * @return String[] of URLs to arcName + * @throws IOException + */ + public String[] arcToUrls(final String arcName) throws IOException { + + NameValuePair[] args = { + new NameValuePair( + FileLocationDBServlet.OPERATION_ARGUMENT, + FileLocationDBServlet.LOOKUP_OPERATION), + + new NameValuePair( + FileLocationDBServlet.NAME_ARGUMENT, + arcName) + }; + String locations = doGetMethod(args); + if(locations != null) { + return locations.split("\n"); + } + return null; + } + + + /** + * add an Url location for an arcName, unless it already exists + * @param arcName + * @param arcUrl + * @throws IOException + */ + public void addArcUrl(final String arcName, final String arcUrl) + throws IOException { + doPostMethod(FileLocationDBServlet.ADD_OPERATION, arcName, arcUrl); + } + + /** + * remove a single Url location for an arcName, if it exists + * @param arcName + * @param arcUrl + * @throws IOException + */ + public void removeArcUrl(final String arcName, final String arcUrl) + throws IOException { + doPostMethod(FileLocationDBServlet.REMOVE_OPERATION, arcName, arcUrl); + } + + private String doGetMethod(NameValuePair[] data) throws IOException { + ParameterFormatter formatter = new ParameterFormatter(); + formatter.setAlwaysUseQuotes(false); + StringBuilder finalUrl = new StringBuilder(serverUrl); + if(data.length > 0) { + finalUrl.append("?"); + } + for(int i = 0; i < data.length; i++) { + if(i == 0) { + finalUrl.append("?"); + } else { + finalUrl.append("&"); + } + finalUrl.append(formatter.format(data[i])); + } + + GetMethod method = new GetMethod(finalUrl.toString()); + + int statusCode = client.executeMethod(method); + if (statusCode != HttpStatus.SC_OK) { + throw new IOException("Method failed: " + method.getStatusLine()); + } + String responseString = method.getResponseBodyAsString(); + if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { + if(responseString.startsWith(FileLocationDBServlet.NO_LOCATION_PREFIX)) { + return null; + } + throw new IOException(responseString); + } + return responseString.substring(OK_RESPONSE_PREFIX.length()+1); + } + + private void doPostMethod(final String operation, final String arcName, + final String arcUrl) + throws IOException { + PostMethod method = new PostMethod(serverUrl); + NameValuePair[] data = { + new NameValuePair(FileLocationDBServlet.OPERATION_ARGUMENT, + operation), + new NameValuePair(FileLocationDBServlet.NAME_ARGUMENT, + arcName), + new NameValuePair(FileLocationDBServlet.URL_ARGUMENT, + arcUrl) + }; + method.setRequestBody(data); + int statusCode = client.executeMethod(method); + if (statusCode != HttpStatus.SC_OK) { + throw new IOException("Method failed: " + method.getStatusLine()); + } + String responseString = method.getResponseBodyAsString(); + if(!responseString.startsWith(OK_RESPONSE_PREFIX)) { + throw new IOException(responseString); + } + } + + private static void USAGE(String message) { + System.err.print("USAGE: " + message + "\n" + + "\t[lookup|add|remove|sync] ...\n" + + "\n" + + "\t lookup LOCATION-DB-URL ARC\n" + + "\t\temit all known URLs for arc ARC\n" + + "\n" + + "\t add LOCATION-DB-URL ARC URL\n" + + "\t\tinform locationDB that ARC is located at URL\n" + + "\n" + + "\t remove LOCATION-DB-URL ARC URL\n" + + "\t\tremove reference to ARC at URL in locationDB\n" + + "\n" + + "\t sync LOCATION-DB-URL DIR DIR-URL\n" + + "\t\tscan directory DIR, and submit all ARC files therein\n" + + "\t\tto locationDB at url DIR-URL/ARC\n" + + "\n" + + "\t get-mark LOCATION-DB-URL\n" + + "\t\temit an identifier for the current marker in the \n" + + "\t\tlocationDB log. These identifiers can be used with the\n" + + "\t\tmark-range operation.\n" + + "\n" + + "\t mark-range LOCATION-DB-URL START END\n" + + "\t\temit to STDOUT one line with the name of all ARC files\n" + + "\t\tadded to the locationDB between marks START and END\n" + + "\n" + + "\t add-stream LOCATION-DB-URL\n" + + "\t\tread lines from STDIN formatted like:\n" + + "\t\t\tNAME<SPACE>URL\n" + + "\t\tand for each line, inform locationDB that file NAME is\n" + + "\t\tlocated at URL\n" + ); + System.exit(2); + } + + /** + * @param args + */ + public static void main(String[] args) { + if(args.length < 2) { + USAGE(""); + System.exit(1); + } + String operation = args[0]; + String url = args[1]; + if(!url.startsWith("http://")) { + USAGE("URL argument 1 must begin with http://"); + } + + FileLocationDBClient locationClient = new FileLocationDBClient(url); + + if(operation.equalsIgnoreCase("add-stream")) { + BufferedReader r = new BufferedReader( + new InputStreamReader(System.in)); + String line; + try { + while((line = r.readLine()) != null) { + String parts[] = line.split(" "); + if(parts.length != 2) { + System.err.println("Bad input(" + line + ")"); + System.exit(2); + } + locationClient.addArcUrl(parts[0],parts[1]); + System.out.println("Added\t" + parts[0] + "\t" + parts[1]); + } + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } + + } else { + if(args.length < 3) { + USAGE(""); + System.exit(1); + } + String arc = args[2]; + if(operation.equalsIgnoreCase("lookup")) { + if(args.length < 3) { + USAGE("lookup LOCATION-URL ARC"); + } + try { + String[] locations = locationClient.arcToUrls(arc); + if(locations == null) { + System.err.println("No locations for " + arc); + System.exit(1); + } + for(int i=0; i <locations.length; i++) { + System.out.println(locations[i]); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("get-mark")) { + if(args.length != 2) { + USAGE("get-mark LOCATION-URL"); + } + try { + long mark = locationClient.getCurrentMark(); + System.out.println(mark); + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("mark-range")) { + if(args.length != 4) { + USAGE("mark-range LOCATION-URL START END"); + } + long start = Long.parseLong(args[3]); + long end = Long.parseLong(args[4]); + try { + Iterator<String> it = + locationClient.getArcsBetweenMarks(start,end); + while(it.hasNext()) { + String next = (String) it.next(); + System.out.println(next); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + + } else if(operation.equalsIgnoreCase("add")) { + if(args.length != 4) { + USAGE("add LOCATION-URL ARC ARC-URL"); + } + String arcUrl = args[3]; + if(!arcUrl.startsWith("http://")) { + USAGE("ARC-URL argument 4 must begin with http://"); + } + try { + locationClient.addArcUrl(arc,arcUrl); + System.out.println("OK"); + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("remove")) { + + if(args.length != 4) { + USAGE("remove LOCATION-URL ARC ARC-URL"); + } + String arcUrl = args[3]; + if(!arcUrl.startsWith("http://")) { + USAGE("ARC-URL argument 4 must begin with http://"); + } + try { + locationClient.removeArcUrl(arc,arcUrl); + System.out.println("OK"); + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else if(operation.equalsIgnoreCase("sync")) { + + if(args.length != 4) { + USAGE("sync LOCATION-URL DIR DIR-URL"); + } + File dir = new File(arc); + String dirUrl = args[3]; + if(!dirUrl.startsWith("http://")) { + USAGE("DIR-URL argument 4 must begin with http://"); + } + try { + if(!dir.isDirectory()) { + USAGE("DIR " + arc + " is not a directory"); + } + + FileFilter filter = new FileFilter() { + public boolean accept(File daFile) { + return daFile.isFile() && + (daFile.getName().endsWith(ARC_SUFFIX) || + daFile.getName().endsWith(ARC_GZ_SUFFIX) || + daFile.getName().endsWith(WARC_SUFFIX) || + daFile.getName().endsWith(WARC_GZ_SUFFIX)); + } + }; + + File[] files = dir.listFiles(filter); + if(files == null) { + throw new IOException("Directory " + dir.getAbsolutePath() + + " is not a directory or had an IO error"); + } + for(int i = 0; i < files.length; i++) { + File file = files[i]; + String name = file.getName(); + String fileUrl = dirUrl + name; + LOGGER.info("Adding location " + fileUrl + " for file " + name); + locationClient.addArcUrl(name,fileUrl); + } + } catch (IOException e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + } else { + USAGE(" unknown operation " + operation); + } + } + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java (from rev 2055, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBLog.java 2008-06-05 20:38:55 UTC (rev 2282) @@ -0,0 +1,173 @@ +/* FileLocationDBLog + * + * $Id$ + * + * Created on 2:38:18 PM Aug 18, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.RandomAccessFile; + +import org.archive.wayback.exception.ConfigurationException; +import org.archive.wayback.util.CloseableIterator; +import org.archive.wayback.util.flatfile.RecordIterator; + +import sun.reflect.generics.reflectiveObjects.NotImplementedException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FileLocationDBLog extends File { + + /** + * + */ + private static final long serialVersionUID = -9128222006544481378L; + + /** + * @param pathname + * @throws ConfigurationException + */ + public FileLocationDBLog(String pathname) throws ConfigurationException { + super(pathname); + if (!isFile()) { + if (exists()) { + throw new ConfigurationException("path(" + pathname + + ") exists but is not a file!"); + } + try { + if (!createNewFile()) { + throw new ConfigurationException( + "Unable to create empty file " + pathname); + } + } catch (IOException e) { + e.printStackTrace(); + throw new ConfigurationException("Unable to create empty file " + + pathname); + } + } + } + + /** + * @return long value indicating the current end position of the log + */ + public long getCurrentMark() { + return length(); + } + + /** + * @param start + * @param end + * @return CleanableIterator that returns all arcs between start and end + * @throws IOException + */ + public CloseableIterator<String> getArcsBetweenMarks(long start, long end) + throws IOException { + + RandomAccessFile raf = new RandomAccessFile(this, "r"); + raf.seek(start); + BufferedReader is = new BufferedReader(new FileReader(raf.getFD())); + return new BufferedRangeIterator(new RecordIterator(is),end - start); + } + + /** + * @param arcName + * @throws IOException + */ + public synchronized void addArc(String arcName) throws IOException { + FileWriter writer = new FileWriter(this, true); + writer.write(arcName + "\n"); + writer.flush(); + writer.close(); + } + + private class BufferedRangeIterator implements CloseableIterator<String> { + private RecordIterator itr; + private long bytesToSend; + private long bytesSent; + private String next; + private boolean done; + /** + * @param itr + * @param bytesToSend + */ + public BufferedRangeIterator(RecordIterator itr, long bytesToSend) { + this.itr = itr; + this.bytesToSend = bytesToSend; + bytesSent = 0; + next = null; + done = false; + } + /* (non-Javadoc) + * @see org.archive.wayback.util.CleanableIterator#clean() + */ + public void close() throws IOException { + if(done == false) { + itr.close(); + done = true; + } + } + + /* (non-Javadoc) + * @see java.util.Iterator#hasNext() + */ + public boolean hasNext() { + if(done) return false; + if(next != null) return true; + if((bytesSent >= bytesToSend) || !itr.hasNext()) { + try { + close(); + } catch (IOException e) { + // TODO This is lame. What is the right way? + throw new RuntimeException(e); + } + return false; + } + next = (String) itr.next(); + return true; + } + + /* (non-Javadoc) + * @see java.util.Iterator#next() + */ + public String next() { + String returnString = next; + next = null; + bytesSent += returnString.length() + 1; // TODO: not X-platform! + return returnString; + } + + /* (non-Javadoc) + * @see java.util.Iterator#remove() + */ + public void remove() { + throw new NotImplementedException(); + } + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java (from rev 2055, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/FileLocationDBServlet.java 2008-06-05 20:38:55 UTC (rev 2282) @@ -0,0 +1,170 @@ +/* FileLocationDBServlet + * + * $Id$ + * + * Created on 5:35:31 PM Aug 21, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.resourcestore.locationdb; + +import java.io.IOException; +import java.io.OutputStream; +import java.text.ParseException; +import java.util.Iterator; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.archive.wayback.resourcestore.locationdb.FileLocationDB; +import org.archive.wayback.webapp.ServletRequestContext; + +import com.sleepycat.je.DatabaseException; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FileLocationDBServlet extends ServletRequestContext { + + protected static final String OPERATION_ARGUMENT = "operation"; + protected static final String NAME_ARGUMENT = "name"; + protected static final String URL_ARGUMENT = "url"; + protected static final String START_ARGUMENT = "start"; + protected static final String END_ARGUMENT = "end"; + protected static final String LOOKUP_OPERATION = "lookup"; + protected static final String GETMARK_OPERATION = "getmark"; + protected static final String GETRANGE_OPERATION = "getrange"; + protected static final String ADD_OPERATION = "add"; + protected static final String REMOVE_OPERATION = "remove"; + protected static final String NO_LOCATION_PREFIX = "ERROR No locations for"; + + private static final long serialVersionUID = 1L; + private FileLocationDB locationDB = null; + + public boolean handleRequest(HttpServletRequest httpRequest, + HttpServletResponse httpResponse) throws IOException, + ServletException { + @SuppressWarnings("unchecked") + Map<String,String[]> queryMap = httpRequest.getParameterMap(); + String message; + FileLocationDB locationDB = getLocationDB(); + try { + message = handleOperation(locationDB,queryMap); + httpResponse.setStatus(HttpServletResponse.SC_OK); + httpResponse.setContentType("text/plain"); + OutputStream os = httpResponse.getOutputStream(); + os.write(message.getBytes()); + } catch (ParseException e) { + e.printStackTrace(); + httpResponse.sendError(HttpServletResponse.SC_BAD_REQUEST, + e.getMessage()); + } + return true; + } + + private String handleOperation(FileLocationDB locationDB, + Map<String,String[]> queryMap) + throws ParseException { + + String operation = getRequiredMapParam(queryMap, OPERATION_ARGUMENT); + String message; + try { + if (operation.equals(LOOKUP_OPERATION)) { + String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); + + message = NO_LOCATION_PREFIX + " " + arcName; + String arcUrls[] = locationDB.arcToUrls(arcName); + if (arcUrls != null && arcUrls.length > 0) { + StringBuffer buf = new StringBuffer("OK "); + for (int i = 0; i < arcUrls.length; i++) { + buf.append("\n"); + buf.append(arcUrls[i]); + } + message = buf.toString(); + } + + } else if (operation.equals(GETMARK_OPERATION)) { + + message = "OK \n" + String.valueOf(locationDB.getCurrentMark()); + + } else if (operation.equals(GETRANGE_OPERATION)) { + + long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT)); + long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT)); + Iterator<String> itr = locationDB.getArcsBetweenMarks(start,end); + StringBuilder str = new StringBuilder(); + str.append("OK "); + while(itr.hasNext()) { + str.append("\n"); + str.append((String)itr.next()); + } + message = str.toString(); + + } else { + + String arcName = getRequiredMapParam(queryMap, NAME_ARGUMENT); + String arcUrl = getRequiredMapParam(queryMap, URL_ARGUMENT); + if (operation.equals(ADD_OPERATION)) { + + locationDB.addArcUrl(arcName, arcUrl); + message = "OK added url " + arcUrl + " for " + arcName; + + } else if (operation.equals(REMOVE_OPERATION)) { + + getLocationDB().removeArcUrl(arcName, arcUrl); + message = "OK removed url " + arcUrl + " for " + arcName; + + } else { + + throw new ParseException("Unknown operation. Must be one " + + "of " + LOOKUP_OPERATION + "," + ADD_OPERATION + + ", or " + REMOVE_OPERATION + ".", 0); + } + } + + } catch (DatabaseException e) { + e.printStackTrace(); + message = e.getMessage(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + message = e.getMessage(); + } + return message; + } + + /** + * @return the locationDB + */ + public FileLocationDB getLocationDB() { + return locationDB; + } + + /** + * @param locationDB the locationDB to set + */ + public void setLocationDB(FileLocationDB locationDB) { + this.locationDB = locationDB; + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-05 20:36:59
|
Revision: 2281 http://archive-access.svn.sourceforge.net/archive-access/?rev=2281&view=rev Author: bradtofel Date: 2008-06-05 13:36:51 -0700 (Thu, 05 Jun 2008) Log Message: ----------- New package to host locationDB and ArcProxy code. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/locationdb/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |