Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/core In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3483/src/java/org/archive/wayback/core Added Files: ResourceResults.java WaybackLogic.java Timestamp.java WMRequest.java Resource.java ResourceResult.java Log Message: Initial check-in -- pre code review --- NEW FILE: ResourceResult.java --- package org.archive.wayback.core; import java.text.ParseException; import org.archive.io.arc.ARCLocation; public class ResourceResult { private final static String CDX_HEADER_STRING = " CDX N b h m s k r V g"; private String url = null; private Timestamp timestamp = null; private String origHost = null; private String mimeType = null; private String httpResponseCode = null; private String md5Fragment = null; private String redirectUrl = null; private long compressedOffset = -1; private String arcFileName = null; public ResourceResult() { super(); } public ARCLocation getARCLocation() { final String daArcName = arcFileName; final long daOffset = compressedOffset; return new ARCLocation() { private String filename = daArcName; private long offset = daOffset; public String getName() { return this.filename; } public long getOffset() { return this.offset; } }; } public void parseLine(final String line, final int lineNumber) throws ParseException { String[] tokens = line.split(" "); if (tokens.length != 9) { throw new ParseException(line, lineNumber); } url = tokens[0]; timestamp = Timestamp.parseBefore(tokens[1]); origHost = tokens[2]; mimeType = tokens[3]; httpResponseCode = tokens[4]; md5Fragment = tokens[5]; redirectUrl = tokens[6]; compressedOffset = Long.parseLong(tokens[7]); arcFileName = tokens[8]; } public static String getCDXHeaderString() { return CDX_HEADER_STRING; } public String toString() { return url + " " + timestamp.getDateStr() + " " + origHost + " " + mimeType + " " + httpResponseCode + " " + md5Fragment + " " + redirectUrl + " " + compressedOffset + " " + arcFileName; } public String toShortString() { return url + "\t" + timestamp.getDateStr() + "\t" + compressedOffset + "\t" + arcFileName; } public String getArcFileName() { return arcFileName; } public long getCompressedOffset() { return compressedOffset; } public String getHttpResponseCode() { return httpResponseCode; } public String getMd5Fragment() { return md5Fragment; } public String getMimeType() { return mimeType; } public String getOrigHost() { return origHost; } public String getRedirectUrl() { return redirectUrl; } public boolean isRedirect() { return (0 != redirectUrl.compareTo("-")); } public Timestamp getTimestamp() { return timestamp; } public String getUrl() { return url; } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } public void setArcFileName(String arcFileName) { this.arcFileName = arcFileName; } public void setCompressedOffset(long compressedOffset) { this.compressedOffset = compressedOffset; } public void setHttpResponseCode(String httpResponseCode) { this.httpResponseCode = httpResponseCode; } public void setMd5Fragment(String md5Fragment) { this.md5Fragment = md5Fragment; } public void setMimeType(String mimeType) { this.mimeType = mimeType; } public void setOrigHost(String origHost) { this.origHost = origHost; } public void setRedirectUrl(String redirectUrl) { this.redirectUrl = redirectUrl; } public void setTimeStamp(Timestamp timeStamp) { this.timestamp = timeStamp; } public void setUrl(String url) { this.url = url; } } --- NEW FILE: WMRequest.java --- package org.archive.wayback.core; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.archive.net.UURI; public class WMRequest { private final Pattern IMAGE_REGEX = Pattern .compile(".*\\.(jpg|jpeg|gif|png|bmp|tiff|tif)$"); private String exactDateRequest = null; private Timestamp exactTimestamp = null; private Timestamp startTimestamp = null; private Timestamp endTimestamp = null; private String referrerUrl = null; private UURI requestURI = null; private UURI redirectURI = null; private boolean retrieval = false; private boolean query = false; private boolean pathQuery = false; public UURI getRedirectURI() { return redirectURI; } public void setRedirectURI(UURI redirectURI) { this.redirectURI = redirectURI; } public WMRequest() { super(); // TODO Auto-generated constructor stub } public boolean isRetrieval() { return this.retrieval; } public boolean isQuery() { return this.query; } public boolean isPathQuery() { return this.pathQuery; } public Timestamp getExactTimestamp() { return exactTimestamp; } public void setExactTimestamp(Timestamp exactTimestamp) { this.exactTimestamp = exactTimestamp; } public Timestamp getEndTimestamp() { return endTimestamp; } public void setEndTimestamp(Timestamp endTimestamp) { this.endTimestamp = endTimestamp; } public String getReferrerUrl() { return referrerUrl; } public void setReferrerUrl(String referrerUrl) { this.referrerUrl = referrerUrl; } public UURI getRequestURI() { return requestURI; } public void setRequestURI(UURI requestURI) { this.requestURI = requestURI; } public Timestamp getStartTimestamp() { return startTimestamp; } public void setStartTimestamp(Timestamp startTimestamp) { this.startTimestamp = startTimestamp; } private void resetType() { this.retrieval = false; this.query = false; this.pathQuery = false; } public void setPathQuery() { resetType(); this.pathQuery = true; } public void setQuery() { resetType(); this.query = true; } public void setRetrieval() { resetType(); this.retrieval = true; } public boolean isImageRetrieval() { String uri = requestURI.getEscapedURI(); Matcher matcher = null; matcher = IMAGE_REGEX.matcher(uri); if (matcher != null && matcher.matches()) { return true; } return false; } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } public String getExactDateRequest() { return exactDateRequest; } public void setExactDateRequest(String exactDateRequest) { this.exactDateRequest = exactDateRequest; } } --- NEW FILE: Resource.java --- /** * */ package org.archive.wayback.core; import java.io.IOException; import org.archive.io.arc.ARCRecord; /** * @author brad * */ public class Resource { ARCRecord arcRecord = null; // probably this should inherit from ARCRecord... public Resource(final ARCRecord rec) { super(); arcRecord = rec; } public String dumpRaw() throws IOException { arcRecord.skipHttpHeader(); String content = ""; byte[] outputBuffer = new byte[8 * 1024]; int read = outputBuffer.length; while ((read = arcRecord.read(outputBuffer, 0, outputBuffer.length)) != -1) { String tmpString = new String(outputBuffer, 0, read); content = content.concat(tmpString); //System.out.write(outputBuffer, 0, read); } //System.out.flush(); return content; } public ARCRecord getArcRecord() { return arcRecord; } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } --- NEW FILE: Timestamp.java --- package org.archive.wayback.core; import java.text.ParseException; import java.util.Date; import org.archive.util.ArchiveUtils; public class Timestamp { private final static String FIRST1_TIMESTAMP = "19960101000000"; private final static String FIRST2_TIMESTAMP = "20000101000000"; private final static String LAST1_TIMESTAMP = "19991231235959"; // private final static String LAST2_TIMESTAMP = "20311231235959"; private final static String LAST2_TIMESTAMP = "29991231235959"; private final static String[] months = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; private String dateStr = null; private Date date = null; public Timestamp() { super(); // TODO Auto-generated constructor stub } public static Timestamp parseBefore(final String dateStr) throws ParseException { Timestamp ts = new Timestamp(); ts.setDateStr(padStartTimestamp(dateStr)); return ts; } public static Timestamp parseAfter(final String dateStr) throws ParseException { Timestamp ts = new Timestamp(); ts.setDateStr(padEndTimestamp(dateStr)); return ts; } public static Timestamp earliestTimestamp() throws ParseException { Timestamp ts = new Timestamp(); ts.setDateStr(FIRST1_TIMESTAMP); return ts; } public static Timestamp latestTimestamp() throws ParseException { Timestamp ts = new Timestamp(); ts.setDateStr(LAST2_TIMESTAMP); return ts; } public static Timestamp fromSse(final int sse) throws ParseException { String dateStr = ArchiveUtils.get14DigitDate(sse * 1000); Timestamp ts = new Timestamp(); ts.setDateStr(dateStr); return ts; } private static String padStartTimestamp(final String input) { String first = FIRST1_TIMESTAMP; if (input.length() == 0) { return FIRST1_TIMESTAMP; } if (input.length() < 4) { if (input.charAt(0) == '2') { first = FIRST2_TIMESTAMP; } } return padTimestamp(input, first); } private static String padEndTimestamp(final String input) { String last = LAST1_TIMESTAMP; if (input.length() == 0) { return LAST2_TIMESTAMP; } if (input.length() < 4) { if (input.charAt(0) == '2') { last = LAST2_TIMESTAMP; } } return padTimestamp(input, last); } private static String padTimestamp(final String input, final String output) { if (input.length() > output.length()) { return input; } return input + output.substring(input.length()); } public String getDateStr() { return dateStr; } public void setDateStr(String dateStr) throws ParseException { date = ArchiveUtils.parse14DigitDate(dateStr); this.dateStr = dateStr; } public int sse() { return Math.round(date.getTime() / 1000); } /** * function that calculates integer milliseconds between this records * timeStamp and the arguments timeStamp. result is the absolute * number of milliseconds difference. * * @param String 14 digit UTC representation of another timestamp. * @return int seconds between the argument and this records timestamp. * @throws ParseException if the inputstring was malformed */ public long absDistanceFromTimestamp(final Timestamp otherTimeStamp) throws ParseException { return Math.abs(distanceFromTimestamp(otherTimeStamp)); } /** * function that calculates integer milliseconds between this records * timeStamp and the arguments timeStamp. result is negative if * this records timeStamp is less than the argument, positive * if it is greater, and 0 if the same. * * @param String 14 digit UTC representation of another timestamp. * @return int seconds between the argument and this records timestamp. * @throws ParseException if the inputstring was malformed */ public long distanceFromTimestamp(final Timestamp otherTimeStamp) throws ParseException { Date myDate = ArchiveUtils.parse14DigitDate(dateStr); Date otherDate = ArchiveUtils.parse14DigitDate(otherTimeStamp .getDateStr()); return otherDate.getTime() - myDate.getTime(); } public String getYear() { return this.dateStr.substring(0, 4); } public String getMonth() { return this.dateStr.substring(4, 6); } public String getDay() { return this.dateStr.substring(6, 8); } public String prettyDate() { String year = dateStr.substring(0, 4); String month = dateStr.substring(4, 6); String day = dateStr.substring(6, 8); int monthInt = Integer.parseInt(month) - 1; String prettyMonth = "UNK"; if ((monthInt >= 0) && (monthInt < months.length)) { prettyMonth = months[monthInt]; } return prettyMonth + " " + day + ", " + year; } public String prettyTime() { return dateStr.substring(8, 10) + ":" + dateStr.substring(10, 12) + ":" + dateStr.substring(12, 14); } public String prettyDateTime() { return prettyDate() + " " + prettyTime(); } public Timestamp startOfYear() throws ParseException { return parseBefore(dateStr.substring(0, 4)); } public Timestamp startOfMonth() throws ParseException { return parseBefore(dateStr.substring(0, 6)); } public Timestamp startOfWeek() throws ParseException { String yearMonth = dateStr.substring(0, 6); String dayOfMonth = dateStr.substring(6, 8); int dom = Integer.parseInt(dayOfMonth); int mod = dom % 7; dom -= mod; String paddedDay = (dom < 10) ? "0" + dom : "" + dom; return parseBefore(yearMonth + paddedDay); } public Timestamp startOfDay() throws ParseException { return parseBefore(dateStr.substring(0, 8)); } public Timestamp startOfHour() throws ParseException { return parseBefore(dateStr.substring(0, 10)); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } //public Date getDate() { //String[] ids = TimeZone.getAvailableIDs(0); //if(ids.length < 1) { // return null; //} //TimeZone gmt = new SimpleTimeZone(0,ids[0]); //Calendar cal = new GregorianCalendar(gmt); //int year = Integer.parseInt(dateStr.substring(0,4)); //int month = Integer.parseInt(dateStr.substring(4,2)) - 1; //int day = Integer.parseInt(dateStr.substring(6,2)); //int hour = Integer.parseInt(dateStr.substring(8,2)); //int min = Integer.parseInt(dateStr.substring(10,2)); //int sec = Integer.parseInt(dateStr.substring(12,2)); // //cal.set(year,month,day,hour,min,sec); //return cal.getTime(); //} // --- NEW FILE: WaybackLogic.java --- package org.archive.wayback.core; import java.util.Properties; import java.util.logging.Logger; import org.archive.wayback.QueryUI; import org.archive.wayback.ReplayUI; import org.archive.wayback.ResourceIndex; import org.archive.wayback.ResourceStore; public class WaybackLogic { private static final Logger LOGGER = Logger.getLogger(WaybackLogic.class .getName()); private static final String REPLAY_UI_CLASS = "replayui.class"; private static final String QUERY_UI_CLASS = "queryui.class"; private static final String RESOURCE_STORE_CLASS = "resourcestore.class"; private static final String RESOURCE_INDEX_CLASS = "resourceindex.class"; private ReplayUI replayUI = null; private QueryUI queryUI = null; private ResourceIndex resourceIndex = null; private ResourceStore resourceStore = null; public WaybackLogic() { super(); // TODO Auto-generated constructor stub } public void init(Properties p) throws Exception { LOGGER.info("WaybackLogic constructing classes..."); replayUI = (ReplayUI) getInstance(p, REPLAY_UI_CLASS, "replayui"); queryUI = (QueryUI) getInstance(p, QUERY_UI_CLASS, "queryUI"); resourceStore = (ResourceStore) getInstance(p, RESOURCE_STORE_CLASS, "resourceStore"); resourceIndex = (ResourceIndex) getInstance(p, RESOURCE_INDEX_CLASS, "resourceIndex"); LOGGER.info("WaybackLogic initializing classes..."); try { replayUI.init(p); LOGGER.info("initialized replayUI"); queryUI.init(p); LOGGER.info("initialized queryUI"); resourceStore.init(p); LOGGER.info("initialized resourceStore"); resourceIndex.init(p); LOGGER.info("initialized resourceIndex"); } catch (Exception e) { throw new Exception(e.getMessage()); } } protected Object getInstance(final Properties p, final String classProperty, final String pretty) throws Exception { Object result = null; String className = (String) p.get(classProperty); if ((className == null) || (className.length() <= 0)) { throw new Exception("No config (" + classProperty + " for " + pretty + ")"); } try { result = Class.forName(className).newInstance(); LOGGER.info("new " + className + " " + pretty + " created."); } catch (Exception e) { // Convert. Add info. throw new Exception("Failed making " + pretty + " with " + className + ": " + e.getMessage()); } return result; } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } public QueryUI getQueryUI() { return queryUI; } public ReplayUI getReplayUI() { return replayUI; } public ResourceIndex getResourceIndex() { return resourceIndex; } public ResourceStore getResourceStore() { return resourceStore; } } --- NEW FILE: ResourceResults.java --- package org.archive.wayback.core; import java.text.ParseException; import java.util.ArrayList; import java.util.Iterator; import org.archive.wayback.core.ResourceResult; import org.archive.wayback.core.WMRequest; public class ResourceResults { ArrayList results = null; public ResourceResults() { super(); this.results = new ArrayList(); } public boolean isEmpty() { return results.isEmpty(); } public void addResourceResult(final ResourceResult result) { results.add(result); } public int getNumResults() { return results.size(); } public ResourceResult getClosest(final WMRequest wmRequest) { ResourceResult closest = null; long closestDistance = 0; ResourceResult cur = null; Timestamp wantTimestamp = wmRequest.getExactTimestamp(); Iterator itr = results.iterator(); while (itr.hasNext()) { cur = (ResourceResult) itr.next(); long curDistance; try { curDistance = cur.getTimestamp().absDistanceFromTimestamp( wantTimestamp); } catch (ParseException e) { continue; } if ((closest == null) || (curDistance < closestDistance)) { closest = cur; closestDistance = curDistance; } } return closest; } public Iterator iterator() { return results.iterator(); } public ArrayList getYears() { ArrayList years = new ArrayList(); String lastYear = ""; Iterator itr = results.iterator(); while (itr.hasNext()) { ResourceResult cur = (ResourceResult) itr.next(); String curYear = cur.getTimestamp().getYear(); if (!curYear.equals(lastYear)) { years.add(curYear); lastYear = curYear; } } return years; } public ArrayList resultsInYear(String year) { ArrayList resultsToReturn = new ArrayList(); Iterator itr = results.iterator(); while (itr.hasNext()) { ResourceResult cur = (ResourceResult) itr.next(); if (cur.getTimestamp().getYear().equals(year)) { resultsToReturn.add(cur); } } return resultsToReturn; } public Timestamp firstTimestamp() { if (results.isEmpty()) { return null; } ResourceResult first = (ResourceResult) results.get(0); return first.getTimestamp(); } public Timestamp lastTimestamp() { if (results.isEmpty()) { return null; } ResourceResult last = (ResourceResult) results.get(results.size() - 1); return last.getTimestamp(); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } } |