From: <al...@us...> - 2008-02-08 22:40:49
|
Revision: 2197 http://archive-access.svn.sourceforge.net/archive-access/?rev=2197&view=rev Author: alexoz Date: 2008-02-08 14:40:50 -0800 (Fri, 08 Feb 2008) Log Message: ----------- Added prepare() to client API, to allow cache preparation and parallel remote lookups. * AccessControlClient.java Added prepare() method. * RuleDao.java Added prepare() to interface. * CachingRuleDao.java Added prepare() method and threading logic to do parallel lookups to populate the cache. * HttpRuleDao.java, HibernateRuleDao.java Added no-op prepare(). Modified Paths: -------------- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/CachingRuleDao.java trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/HttpRuleDao.java trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/RuleDao.java trunk/archive-access/projects/access-control/oracle/src/main/java/org/archive/accesscontrol/model/HibernateRuleDao.java Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java 2008-02-08 21:56:01 UTC (rev 2196) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java 2008-02-08 22:40:50 UTC (rev 2197) @@ -1,10 +1,13 @@ package org.archive.accesscontrol; +import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import org.archive.accesscontrol.model.Rule; import org.archive.accesscontrol.model.RuleSet; import org.archive.net.PublicSuffixes; +import org.archive.util.ArchiveUtils; import org.archive.util.SURT; /** @@ -70,6 +73,7 @@ */ public Rule getRule(String url, Date captureDate, Date retrievalDate, String who) { + url = ArchiveUtils.addImpliedHttpIfNecessary(url); String surt = SURT.fromURI(url); String publicSuffix = PublicSuffixes .reduceSurtToTopmostAssigned(getSurtAuthority(surt)); @@ -82,6 +86,24 @@ retrievalDate, who); return matchingRule; } + + + /** + * This method allows the client to prepare for lookups from a given set of + * urls. This can warm up a cache and/or enable a mass data transfer to be done in + * parallel. + * + * @param surts + */ + public void prepare(Collection<String> urls) { + ArrayList<String> publicSuffixes = new ArrayList<String>(urls.size()); + for (String url: urls) { + String surt = SURT.fromURI(ArchiveUtils.addImpliedHttpIfNecessary(url)); + publicSuffixes.add(PublicSuffixes + .reduceSurtToTopmostAssigned(getSurtAuthority(surt))); + } + ruleDao.prepare(publicSuffixes); + } protected String getSurtAuthority(String surt) { int indexOfOpen = surt.indexOf("://("); Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/CachingRuleDao.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/CachingRuleDao.java 2008-02-08 21:56:01 UTC (rev 2196) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/CachingRuleDao.java 2008-02-08 22:40:50 UTC (rev 2197) @@ -1,5 +1,9 @@ package org.archive.accesscontrol; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + import org.archive.accesscontrol.model.RuleSet; /** @@ -9,10 +13,11 @@ * @author aosborne * */ -public class CachingRuleDao extends LruCache<String, RuleSet> implements - RuleDao { +public class CachingRuleDao implements RuleDao { private static final long serialVersionUID = 1L; + private static final int PREPARE_THREAD_COUNT = 5; protected RuleDao ruleDao; + protected LruCache<String, RuleSet> cache = new LruCache<String, RuleSet>(); public CachingRuleDao(RuleDao ruleDao) { super(); @@ -32,11 +37,57 @@ } public RuleSet getRuleTree(String surt) { - RuleSet rules = super.get(surt); + RuleSet rules; + synchronized (cache) { + rules = cache.get(surt); + } if (rules == null) { rules = ruleDao.getRuleTree(surt); - super.put(surt, rules); + synchronized (cache) { + cache.put(surt, rules); + } } return rules; } + + class FetchThread extends Thread { + private List<String> surts; + + public FetchThread(List<String> surts) { + this.surts = surts; + } + + public void run() { + while (true) { + String surt; + synchronized (surts) { + if (surts.isEmpty()) + break; + surt = surts.remove(0); + } + getRuleTree(surt); + } + } + } + + /** + * Prepare the cache to lookup info for a given set of surts. The fetches + * happen in parallel so this also makes a good option for speeding up bulk lookups. + * + * @param surts + */ + public void prepare(Collection<String> surts) { + List<String> safeSurts = new ArrayList<String>(surts); + FetchThread threads[] = new FetchThread[PREPARE_THREAD_COUNT ]; + for (int i = 0; i < PREPARE_THREAD_COUNT ; i++) { + threads[i] = new FetchThread(safeSurts); + threads[i].start(); + } + for (int i = 0; i < PREPARE_THREAD_COUNT ; i++) { + try { + threads[i].join(); + } catch (InterruptedException e) { + } + } + } } Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/HttpRuleDao.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/HttpRuleDao.java 2008-02-08 21:56:01 UTC (rev 2196) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/HttpRuleDao.java 2008-02-08 22:40:50 UTC (rev 2197) @@ -1,9 +1,11 @@ package org.archive.accesscontrol; import java.io.IOException; +import java.util.Collection; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; import org.archive.accesscontrol.model.Rule; import org.archive.accesscontrol.model.RuleSet; @@ -15,13 +17,14 @@ * the REST interface\xCAan oracle. * * For details of the protocol, see: - * http://webteam.archive.org/confluence/display/wayback/Exclusions+API + * http://webteam.archive.org/confluence/display/wayback/Exclusions+API * * @author aosborne * */ public class HttpRuleDao implements RuleDao { - protected HttpClient http = new HttpClient(); + protected HttpClient http = new HttpClient( + new MultiThreadedHttpConnectionManager()); protected XStream xstream = new XStream(); private String oracleUrl; @@ -30,14 +33,14 @@ xstream.alias("rule", Rule.class); xstream.alias("ruleSet", RuleSet.class); } - + /** * @see RuleDao#getRuleTree(String) */ public RuleSet getRuleTree(String surt) { HttpMethod method = new GetMethod(oracleUrl + "/rules/tree/" + surt); RuleSet rules; - + try { http.executeMethod(method); String response = method.getResponseBodyAsString(); @@ -46,7 +49,7 @@ } catch (IOException e) { e.printStackTrace(); return null; - } + } method.releaseConnection(); return rules; } @@ -59,10 +62,15 @@ } /** - * @param oracleUrl the oracleUrl to set + * @param oracleUrl + * the oracleUrl to set */ public void setOracleUrl(String oracleUrl) { this.oracleUrl = oracleUrl; } - + + public void prepare(Collection<String> surts) { + // no-op + } + } Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/RuleDao.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/RuleDao.java 2008-02-08 21:56:01 UTC (rev 2196) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/RuleDao.java 2008-02-08 22:40:50 UTC (rev 2197) @@ -1,11 +1,13 @@ package org.archive.accesscontrol; +import java.util.Collection; + import org.apache.commons.httpclient.URIException; import org.archive.accesscontrol.model.RuleSet; /** - * A RuleDao provides methods for retrieving rule information from a local database or - * remote oracle. + * A RuleDao provides methods for retrieving rule information from a local + * database or remote oracle. * * @author aosborne * @@ -27,4 +29,12 @@ */ public RuleSet getRuleTree(String surt); + /** + * This method allows a RuleDao to prepare for lookups from a given set of + * surts. This can warm up a cache and/or enable a bulk lookup to be done in + * parallel. Many implementations may make it a no-op. + * + * @param surts + */ + public void prepare(Collection<String> surts); } Modified: trunk/archive-access/projects/access-control/oracle/src/main/java/org/archive/accesscontrol/model/HibernateRuleDao.java =================================================================== --- trunk/archive-access/projects/access-control/oracle/src/main/java/org/archive/accesscontrol/model/HibernateRuleDao.java 2008-02-08 21:56:01 UTC (rev 2196) +++ trunk/archive-access/projects/access-control/oracle/src/main/java/org/archive/accesscontrol/model/HibernateRuleDao.java 2008-02-08 22:40:50 UTC (rev 2197) @@ -1,5 +1,6 @@ package org.archive.accesscontrol.model; +import java.util.Collection; import java.util.List; import org.apache.commons.httpclient.URIException; @@ -105,4 +106,8 @@ public void deleteAllRules() { getHibernateTemplate().bulkUpdate("delete from Rule"); } + + public void prepare(Collection<String> surts) { + // no-op + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |