From: Brad <bra...@us...> - 2005-10-19 01:22:45
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/arcindexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv901/src/java/org/archive/wayback/arcindexer Modified Files: IndexPipeline.java ArcIndexer.java Log Message: lots of javadoc comments Index: ArcIndexer.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/arcindexer/ArcIndexer.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** ArcIndexer.java 18 Oct 2005 02:30:49 -0000 1.1 --- ArcIndexer.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* ArcIndexer + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.arcindexer; *************** *** 17,28 **** import org.apache.commons.httpclient.Header; public class ArcIndexer { private final static String LOCATION_HTTP_HEADER = "Location"; public ArcIndexer() { super(); - // TODO Auto-generated constructor stub } public ResourceResults indexArc(final String arcPath) throws IOException { ResourceResults results = new ResourceResults(); --- 40,67 ---- import org.apache.commons.httpclient.Header; + /** + * Transforms an ARC file into ResourceResults, or a serialized ResourceResults + * file(CDX). + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class ArcIndexer { private final static String LOCATION_HTTP_HEADER = "Location"; + /** + * Constructor + */ public ArcIndexer() { super(); } + /** + * Create a ResourceResults representing the records in ARC file at arcPath. + * + * @param arcPath + * @return ResourceResults in arcPath. + * @throws IOException + */ public ResourceResults indexArc(final String arcPath) throws IOException { ResourceResults results = new ResourceResults(); *************** *** 31,35 **** arcReader.setParseHttpHeaders(true); // doh. this does not generate quite the columns we need: ! //arcReader.createCDXIndexFile(arcPath); Iterator itr = arcReader.iterator(); while (itr.hasNext()) { --- 70,74 ---- arcReader.setParseHttpHeaders(true); // doh. this does not generate quite the columns we need: ! // arcReader.createCDXIndexFile(arcPath); Iterator itr = arcReader.iterator(); while (itr.hasNext()) { *************** *** 39,47 **** result = arcRecordToResourceResult(rec, arc); } catch (NullPointerException e) { - // TODO Auto-generated catch block e.printStackTrace(); continue; } catch (ParseException e) { - // TODO Auto-generated catch block e.printStackTrace(); continue; --- 78,84 ---- *************** *** 81,85 **** } result.setRedirectUrl(redirectUrl); ! result.setTimeStamp(Timestamp.parseBefore(meta.getDate())); UURI uriCap = new UURI(meta.getUrl(), false); String searchHost = uriCap.getHostBasename(); --- 118,122 ---- } result.setRedirectUrl(redirectUrl); ! result.setTimestamp(Timestamp.parseBefore(meta.getDate())); UURI uriCap = new UURI(meta.getUrl(), false); String searchHost = uriCap.getHostBasename(); *************** *** 92,95 **** --- 129,139 ---- } + /** + * Write out ResourceResults into CDX file at cdxPath + * + * @param results + * @param cdxPath + * @throws IOException + */ public void serializeResults(final ResourceResults results, final String cdxPath) throws IOException { *************** *** 108,112 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub ArcIndexer indexer = new ArcIndexer(); String arc = args[0]; --- 152,155 ---- Index: IndexPipeline.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/arcindexer/IndexPipeline.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** IndexPipeline.java 18 Oct 2005 02:30:49 -0000 1.1 --- IndexPipeline.java 19 Oct 2005 01:22:36 -0000 1.2 *************** *** 1,2 **** --- 1,25 ---- + /* IndexPipeline + * + * Created on 2005/10/18 14:00:00 + * + * Copyright (C) 2005 Internet Archive. + * + * This file is part of the Wayback Machine (crawler.archive.org). + * + * Wayback Machine is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback Machine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback Machine; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + package org.archive.wayback.arcindexer; *************** *** 11,18 **** import com.sun.org.apache.xml.internal.utils.StringToStringTable; public class IndexPipeline { private File arcDir = null; ! public File mergeDir = null; private File queuedDir = null; --- 34,48 ---- import com.sun.org.apache.xml.internal.utils.StringToStringTable; + /** + * Implements updating of a BDBResourceIndex using several directories with data + * files or flag files. + * + * @author Brad Tofel + * @version $Date$, $Revision$ + */ public class IndexPipeline { private File arcDir = null; ! private File mergeDir = null; private File queuedDir = null; *************** *** 24,30 **** private ArcIndexer indexer = null; public IndexPipeline() { super(); - // TODO Auto-generated constructor stub } --- 54,62 ---- private ArcIndexer indexer = null; + /** + * Constructor + */ public IndexPipeline() { super(); } *************** *** 35,38 **** --- 67,78 ---- } + /** + * Initialize this object from several path arguments. + * + * @param arcDir + * @param mergeDir + * @param workDir + * @throws IOException + */ public void init(final String arcDir, final String mergeDir, final String workDir) throws IOException { *************** *** 85,88 **** --- 125,133 ---- } + /** + * Find all new ARC files, and queue them for indexing. + * + * @throws IOException + */ public void queueNewArcs() throws IOException { ArrayList newArcs = getNewArcs(); *************** *** 96,99 **** --- 141,151 ---- } + /** + * Index any ARC files queued for indexing, queueing the resulting CDX files + * for merging with the BDBResourceIndex. + * + * @throws MalformedURLException + * @throws IOException + */ public void indexArcs() throws MalformedURLException, IOException { queueNewArcs(); *************** *** 130,136 **** */ public static void main(String[] args) { - // TODO Auto-generated method stub } } --- 182,194 ---- */ public static void main(String[] args) { } + /** + * @return Returns the mergeDir. + */ + public File getMergeDir() { + return mergeDir; + } + } |