You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: Sverre B. <sv...@us...> - 2005-10-10 13:11:41
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12882 Modified Files: documentDispatcher.php Log Message: Fixed bug 1322594 Index: documentDispatcher.php =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/documentDispatcher.php,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** documentDispatcher.php 5 Oct 2005 22:42:47 -0000 1.4 --- documentDispatcher.php 10 Oct 2005 13:11:37 -0000 1.5 *************** *** 143,147 **** $searchEngine = new $conf_index_class(); $locator = new documentLocator(); ! $locator->initialize($searchEngine, $url, false, $timestamp, 'NEAR'); $numhits = $locator->findVersions(); if($numhits <= 0) { // No document found --- 143,154 ---- $searchEngine = new $conf_index_class(); $locator = new documentLocator(); ! if ($timestamp != "") { ! $doclocmode = 'NEAR'; ! } ! else { // if no time given show the latest version ! $doclocmode = 'LAST'; ! } ! ! $locator->initialize($searchEngine, $url, false, $timestamp, $doclocmode); $numhits = $locator->findVersions(); if($numhits <= 0) { // No document found |
From: Sverre B. <sv...@us...> - 2005-10-10 11:23:50
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20658/lib Modified Files: meta.inc Log Message: Index: meta.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/meta.inc,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** meta.inc 5 Oct 2005 22:43:56 -0000 1.1 --- meta.inc 10 Oct 2005 11:23:40 -0000 1.2 *************** *** 56,59 **** --- 56,60 ---- "content_checksum" => false, "http-header" => false, + "errormessage" => false ); *************** *** 65,69 **** */ function metaParser($aid) { ! include ("../lib/config.inc"); $this->aid = $aid; $this->retriever_url = $document_retriever . "?reqtype=getmeta&aid=" . $this->aid."&reqtype=getmeta"; --- 66,70 ---- */ function metaParser($aid) { ! include ("config.inc"); $this->aid = $aid; $this->retriever_url = $document_retriever . "?reqtype=getmeta&aid=" . $this->aid."&reqtype=getmeta"; *************** *** 115,118 **** --- 116,124 ---- $this->errormsg = "Error : Failed to open stream!"; } + + if ($this->metadata['errormessage']) { + $this->errormsg = "Error from retriever : <a href=\"" . $this->retriever_url . "\">" . $this->metadata['errormessage'] . "</a>"; + $retval = false; + } return $retval; } *************** *** 149,160 **** $this->xml_parser_in["filestatus_long"] = true; } - elseif ($name == "CONTENT_CHECKSUM") { $this->xml_parser_in["content_checksum"] = true; } - elseif ($name == "HTTP-HEADER") { $this->xml_parser_in["http-header"] = true; } } --- 155,168 ---- $this->xml_parser_in["filestatus_long"] = true; } elseif ($name == "CONTENT_CHECKSUM") { $this->xml_parser_in["content_checksum"] = true; } elseif ($name == "HTTP-HEADER") { $this->xml_parser_in["http-header"] = true; } + elseif ($name == "ERRORMESSAGE") { + $this->xml_parser_in["errormessage"] = true; + } + } *************** *** 195,199 **** --- 203,211 ---- elseif ($name == "HTTP-HEADER") { $this->xml_parser_in["http-header"] = false; + } + elseif ($name == "ERRORMESSAGE") { + $this->xml_parser_in["errormessage"] = true; } + } *************** *** 236,240 **** elseif ($this->xml_parser_in["http-header"]) { $this->metadata['http-header'] .= $data; ! } } } --- 248,255 ---- elseif ($this->xml_parser_in["http-header"]) { $this->metadata['http-header'] .= $data; ! } ! elseif ($this->xml_parser_in["errormessage"]) { ! $this->metadata['errormessage'] .= $data; ! } } } |
From: Michael S. <sta...@us...> - 2005-10-07 16:40:00
|
Update of /cvsroot/archive-access/archive-access/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv2073/xdocs Modified Files: index.xml Log Message: * xdocs/index.xml Fix link. Index: index.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/xdocs/index.xml,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** index.xml 4 Oct 2005 23:25:51 -0000 1.21 --- index.xml 7 Oct 2005 16:39:51 -0000 1.22 *************** *** 47,57 **** <a href="http://cvs.sourceforge.net/viewcvs.py/archive-access/archive-access/projects/hedaern/docs/guide.pdf?rev=1">guide</a>.</li> ! <li><a href="/projects/wera/">wera</a> is an archive viewer application that gives an Internet Archive Wayback Machine-like ! access to web archive collections. Wera is php5 application based ! on -- and replacing -- the ! <a href="http://nwa.nb.no/">NwaToolset</a>. ! Uses <a href="/projects/nutch">Nutchwax</a> as its search engine ! core and the ARCRetriever fetching records from ARCs. </li> --- 47,59 ---- <a href="http://cvs.sourceforge.net/viewcvs.py/archive-access/archive-access/projects/hedaern/docs/guide.pdf?rev=1">guide</a>.</li> ! <li> ! <a href="/projects/wera/">wera</a> is an archive <i>viewer</i> application that gives an Internet Archive Wayback Machine-like ! access to web archive collections. Wera is a php5 application based ! on -- and replaces -- ! the <a href="http://nwa.nb.no/">NwaToolset</a>. Currently wera ! uses <a href="/projects/nutch">Nutchwax</a> as its search engine ! core and the ARCRetriever webpp (included) fetching records from ! ARCs. </li> |
From: Michael S. <sta...@us...> - 2005-10-06 23:34:57
|
Update of /cvsroot/archive-access/archive-access/projects/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22683 Modified Files: README.txt Log Message: * README.txt Point to the manual. It has it all. Index: README.txt =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/README.txt,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** README.txt 5 Oct 2005 00:06:30 -0000 1.2 --- README.txt 6 Oct 2005 23:34:49 -0000 1.3 *************** *** 1,9 **** $Id$ ! TODO ! ! ON BUILDING WERA ! There is nothing to build in wera. You just copy the src/webapps/wera directory ! under your suitably configured apache htdocs directory. Though wera project ! is php, not java, wera uses maven to build the wera website, create the wera ! binary and src bundles, and generating the docbook manual at build time. --- 1,5 ---- $Id$ ! For requirements, installation instructions and overview, see the Wera Manual ! locally at docs/articles/manual.html or at ! http://archive-access.sourceforge.net/projects/wera/articles/manual.html |
From: Michael S. <sta...@us...> - 2005-10-06 23:16:36
|
Update of /cvsroot/archive-access/archive-access/projects/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17157 Modified Files: project.xml Log Message: * project.xml Add link to iipc. Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/project.xml,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** project.xml 6 Oct 2005 23:15:02 -0000 1.8 --- project.xml 6 Oct 2005 23:16:29 -0000 1.9 *************** *** 17,21 **** <organization> <name >IIPC</name> ! <url >http://archive-access.sourceforge.net/projects/wera/</url> <logo>/images/iipc.gif</logo> </organization> --- 17,21 ---- <organization> <name >IIPC</name> ! <url >http://www.netpreserve.org/</url> <logo>/images/iipc.gif</logo> </organization> |
From: Michael S. <sta...@us...> - 2005-10-06 23:15:10
|
Update of /cvsroot/archive-access/archive-access/projects/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16599 Modified Files: project.xml Log Message: * project.xml * src/images/iipc.gif Add mention of iipc gif. Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/project.xml,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** project.xml 6 Oct 2005 22:32:45 -0000 1.7 --- project.xml 6 Oct 2005 23:15:02 -0000 1.8 *************** *** 16,22 **** <!-- details about the organization that 'owns' the project --> <organization> ! <name >NWA</name> <url >http://archive-access.sourceforge.net/projects/wera/</url> ! <logo>/images/nwa.jpg</logo> </organization> --- 16,22 ---- <!-- details about the organization that 'owns' the project --> <organization> ! <name >IIPC</name> <url >http://archive-access.sourceforge.net/projects/wera/</url> ! <logo>/images/iipc.gif</logo> </organization> *************** *** 24,28 **** <inceptionYear>2005</inceptionYear> <package>no.nb.nwa</package> ! <logo /> <description>WERA (Web ARchive Access) is a freely available solution for searching and navigating archived web document collections. It works --- 24,28 ---- <inceptionYear>2005</inceptionYear> <package>no.nb.nwa</package> ! <logo>/images/nwa.jpg</logo> <description>WERA (Web ARchive Access) is a freely available solution for searching and navigating archived web document collections. It works |
From: Michael S. <sta...@us...> - 2005-10-06 23:15:10
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/images In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16599/src/images Added Files: iipc.gif Log Message: * project.xml * src/images/iipc.gif Add mention of iipc gif. --- NEW FILE: iipc.gif --- (This appears to be a binary file; contents omitted.) |
From: Michael S. <sta...@us...> - 2005-10-06 22:32:57
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/articles In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6029/src/articles Modified Files: manual.xml Log Message: * maven.xml * project.properties * project.xml * src/articles/manual.xml * xdocs/navigation.xml * xdocs/requirements.xml Updated text around wera. Removed stuff like requirements. The manual has it all. Index: manual.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/articles/manual.xml,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** manual.xml 4 Oct 2005 22:59:26 -0000 1.1 --- manual.xml 6 Oct 2005 22:32:46 -0000 1.2 *************** *** 73,78 **** <listitem> <para>A Search Engine which holds a full-text index of the archived ! web documents. Currently the NutchWAX search engine is ! supported.</para> </listitem> --- 73,79 ---- <listitem> <para>A Search Engine which holds a full-text index of the archived ! web documents. Currently the <ulink ! url="http://archive-access.sourceforge.net/projects/nutch/">NutchWAX</ulink> ! search engine is supported.</para> </listitem> *************** *** 80,84 **** <para>A Document Retriever which serves as the interface between the Access module and the web archive. The Document Retriever delivers ! archived files and associated metadata to WERA upon request.</para> </listitem> </itemizedlist> --- 81,87 ---- <para>A Document Retriever which serves as the interface between the Access module and the web archive. The Document Retriever delivers ! archived files and associated metadata to WERA upon request ! (WERA is bundled with a simple webapp that knows how to fetch ! records from a directory of Internet Archive ARC files).</para> </listitem> </itemizedlist> *************** *** 92,99 **** <title>NutchWAX</title> ! <para>Currently the Jakarta Lucene based NutchWAX search engine is supported. WERA must (at the moment) be downloaded and installed ! separately. See http://archive-access.sourceforge.net/projects/nutch/ ! for further information.</para> </section> --- 95,103 ---- <title>NutchWAX</title> ! <para>Currently the Jakarta Nutch/Lucene based NutchWAX search engine is supported. WERA must (at the moment) be downloaded and installed ! separately. See <ulink ! url="http://archive-access.sourceforge.net/projects/nutch/">NutchWax ! site</ulink> for further information.</para> </section> *************** *** 200,206 **** <title>Obtaining WERA</title> ! <para>The latest version of WERA may be downloaded from WERA <ulink ! url="http://nwatoolset.sourceforge.net">home page</ulink> at ! sourceforge.</para> </section> --- 204,211 ---- <title>Obtaining WERA</title> ! <para>The latest version of WERA may be downloaded from the ! archive-access <ulink ! url="http://sourceforge.net/project/showfiles.php?group_id=118427">files ! pages</ulink> at sourceforge.</para> </section> *************** *** 214,218 **** <title>System Requirements</title> ! <para>WERA and the NWA Adapted Lucene search engine has been tested on different builds of <emphasis>RedHat</emphasis> (7.3, 8, AS2 etc.), <emphasis>Fedora</emphasis> and <emphasis>Suse</emphasis> Linux. There --- 219,223 ---- <title>System Requirements</title> ! <para>WERA has been tested on different builds of <emphasis>RedHat</emphasis> (7.3, 8, AS2 etc.), <emphasis>Fedora</emphasis> and <emphasis>Suse</emphasis> Linux. There *************** *** 253,257 **** <para>Tomcat servlet container (http://jakarta.apache.org/tomcat/index.html). The ArcRetriever ! web app has been tested on v.5.0.27 and 5.0.28</para> </listitem> --- 258,263 ---- <para>Tomcat servlet container (http://jakarta.apache.org/tomcat/index.html). The ArcRetriever ! web app has been tested on v.5.0.27 and 5.0.28 as well as in ! 5.5.9.</para> </listitem> *************** *** 267,270 **** --- 273,281 ---- <title>Java Based Installer</title> + <note> + <para>The java-based installer is momentarily unavailable. Will + be fixed in upcoming release. + </para> + </note> <para>To install WERA do the following:</para> *************** *** 309,330 **** <itemizedlist> <listitem> ! <para>Download wera-x-y-z-manual-install.tar.gz from ! sourceforge.</para> </listitem> <listitem> ! <para>Unpack the gzipped tarball into the Apache document root ! directory on the host where you want WERA installed.</para> </listitem> <listitem> <para>Move the file ArcRetriever.war from ! <apcheWebRootDir>/wera/ to the webapps directory of the ! tomcat installation of the host where your ARC-files ! recide.</para> </listitem> <listitem> ! <para>Edit the file <apcheWebRootDir>/wera/lib/config.inc (see below for details).</para> </listitem> --- 320,343 ---- <itemizedlist> <listitem> ! <para>Download wera-x-y-z.tar.gz from sourceforge. ! Untar and gunzip the bundle. Let the resultant directory ! be WERA_HOME (e.g. wera-x-y-z).</para> </listitem> <listitem> ! <para>Move <filename>$WERA_HOME/webapps/wera</filename> into the ! Apache document root directory -- HTDOCS -- on the host where you ! want the WERA application to run.</para> </listitem> <listitem> <para>Move the file ArcRetriever.war from ! <filename>$WERA_HOME/webapps/wera</filename> to the webapps ! directory of the tomcat installation of the host where your ! ARC-files reside (i.e. $TOMCAT_HOME/webapps).</para> </listitem> <listitem> ! <para>Edit the file <filename>HTDOCS/wera/lib/config.inc</filename> (see below for details).</para> </listitem> *************** *** 334,339 **** <title>Settings</title> ! <para>Settings for WERA can be found in the file ! <apacheWebRootDir>/wera/lib/config.inc. Edit this file in order to configure WERA for your environment. Parameters to adapt:</para> --- 347,352 ---- <title>Settings</title> ! <para>Settings for WERA can be found in the file ! <filename>HTDOCS/wera/lib/config.inc</filename>. Edit this file in order to configure WERA for your environment. Parameters to adapt:</para> *************** *** 348,352 **** <entry>Change this so that it corresponds with your ! environment i.e. <apacheWebRootDir>/wera (you may of course rename the extracted wera directory to something else, and even choose to place it further down in the --- 361,365 ---- <entry>Change this so that it corresponds with your ! environment i.e. <filename>HTDOCS/wera</filename> (you may of course rename the extracted wera directory to something else, and even choose to place it further down in the *************** *** 371,378 **** <entry>$conf_aid_prefix = "/var/arcs/";</entry> ! <entry>The current version of the ArcRetriever needs to know where the ARC-files are located. All the ARC-files that you ! indexed with nucth should be placed in one directory. The ! path goes into this parameter.</entry> </row> --- 384,392 ---- <entry>$conf_aid_prefix = "/var/arcs/";</entry> ! <entry>The current version of the arcretriever needs to know where the ARC-files are located. All the ARC-files that you ! indexed with nutch should be placed in one directory. The ! path goes into this parameter (This configuration will be ! moved into arcretreiver).</entry> </row> *************** *** 386,394 **** <row> <entry>$document_retriever = ! "http://localhost:8080/ArcRetriever/ArcRetriever";</entry> <entry>Change the host name and port to point the tomcat installation of the host where your ARC-files ! recide.</entry> </row> --- 400,408 ---- <row> <entry>$document_retriever = ! "http://localhost:8080/arcretriever/arcretriever";</entry> <entry>Change the host name and port to point the tomcat installation of the host where your ARC-files ! reside.</entry> </row> *************** *** 401,405 **** (<hostname>:<port>). If you renamed the wera directory or unpacked it further down relative to ! ApacheWebRoot, update this parameter accordingly.</entry> </row> </tbody> --- 415,419 ---- (<hostname>:<port>). If you renamed the wera directory or unpacked it further down relative to ! HTDOCS, update this parameter accordingly.</entry> </row> </tbody> *************** *** 423,427 **** <orderedlist> <listitem> ! <para>Test that the ArcRetriever is functioning correctly</para> </listitem> --- 437,441 ---- <orderedlist> <listitem> ! <para>Test that the arcretriever is functioning correctly</para> </listitem> *************** *** 462,466 **** <para>An example of the result of the getmeta request ! http://localhost:8080/ArcRetriever/ArcRetriever?aid=5160509//home/wera/arcs/IAH-20041102080031-00007-utvikling1.nb.no.arc.gz&reqtype=getmeta is given below.</para> --- 476,480 ---- <para>An example of the result of the getmeta request ! http://localhost:8080/arcretriever/arcretriever?aid=5160509//home/wera/arcs/IAH-20041102080031-00007-utvikling1.nb.no.arc.gz&reqtype=getmeta is given below.</para> *************** *** 511,513 **** </section> </section> ! </article> \ No newline at end of file --- 525,527 ---- </section> </section> ! </article> |
From: Michael S. <sta...@us...> - 2005-10-06 22:32:55
|
Update of /cvsroot/archive-access/archive-access/projects/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6029 Modified Files: maven.xml project.properties project.xml Log Message: * maven.xml * project.properties * project.xml * src/articles/manual.xml * xdocs/navigation.xml * xdocs/requirements.xml Updated text around wera. Removed stuff like requirements. The manual has it all. Index: maven.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/maven.xml,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** maven.xml 5 Oct 2005 18:15:54 -0000 1.3 --- maven.xml 6 Oct 2005 22:32:45 -0000 1.4 *************** *** 14,22 **** <arg value="--archive" /> <arg value="--rsh=ssh" /> ! <arg ! value="${maven.build.dir}/docs/"/> <arg value="${maven.username}@archive-access.sf.net:/home/groups/a/ar/archive-access/htdocs/projects/wera/" /> </exec> </goal> <preGoal name="xdoc:jelly-transform"> <attainGoal name="faq" /> --- 14,22 ---- <arg value="--archive" /> <arg value="--rsh=ssh" /> ! <arg value="${maven.build.dir}/docs/"/> <arg value="${maven.username}@archive-access.sf.net:/home/groups/a/ar/archive-access/htdocs/projects/wera/" /> </exec> </goal> + <preGoal name="xdoc:jelly-transform"> <attainGoal name="faq" /> Index: project.properties =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/project.properties,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** project.properties 5 Oct 2005 18:15:54 -0000 1.3 --- project.properties 6 Oct 2005 22:32:45 -0000 1.4 *************** *** 32,34 **** --- 32,35 ---- # Properties for building the ArcRetriever WAR. + maven.war.src = ${maven.src.dir}/webapps/arcretriever maven.war.final.name = arcretriever.war Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/project.xml,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** project.xml 5 Oct 2005 18:15:54 -0000 1.6 --- project.xml 6 Oct 2005 22:32:45 -0000 1.7 *************** *** 17,21 **** <organization> <name >NWA</name> ! <url >http://nwa.nb.no/</url> <logo>/images/nwa.jpg</logo> </organization> --- 17,21 ---- <organization> <name >NWA</name> ! <url >http://archive-access.sourceforge.net/projects/wera/</url> <logo>/images/nwa.jpg</logo> </organization> *************** *** 25,38 **** <package>no.nb.nwa</package> <logo /> ! <description>Wera is an archive <i>viewer</i> application that ! gives an Internet ! Archive Wayback Machine-like access to web archive collections. Wera is ! a php5 application based on -- and replacing -- the <a ! href="http://nwa.nb.no/">NwaToolset</a>. Currently, it uses ! <a href="/projects/nutch">Nutchwax</a> as its search engine ! core and the ARCRetriever (included) webapp fetching records from ARCs. </description> <!-- a short description of what the project does --> ! <shortDescription>An archive viewer application. </shortDescription> <!-- The project home page --> --- 25,46 ---- <package>no.nb.nwa</package> <logo /> ! <description>WERA (Web ARchive Access) is a freely available solution for ! searching and navigating archived web document collections. It works ! like the Internet Archive's <a ! href="http://www.archive.org/web">Wayback ! Machine</a> except it also allows for full-text search of the web ! archive. Wera is a php application based on pieces from -- and ! now, with <a href="/projects/nutch">Nutchwax</a> ! replaces -- <a href="http://nwa.nb.no/">NwaToolset</a>. ! The wera component includes an ARCRetriever webapp for the fetching of ! records from directories of Internet Archive ARC files. ! See the <a href="articles/manual">wera ! Manual</a> for more on how wera works, requirements, and installation. ! Wera development has been sponsored by the <a ! href="http://www.netpreserve.net">International Internet Preservation ! Consortium (IIPC)</a>. </description> <!-- a short description of what the project does --> ! <shortDescription>An Archive viewer application. </shortDescription> <!-- The project home page --> |
From: Michael S. <sta...@us...> - 2005-10-06 22:32:53
|
Update of /cvsroot/archive-access/archive-access/projects/wera/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv6029/xdocs Modified Files: navigation.xml Removed Files: requirements.xml Log Message: * maven.xml * project.properties * project.xml * src/articles/manual.xml * xdocs/navigation.xml * xdocs/requirements.xml Updated text around wera. Removed stuff like requirements. The manual has it all. Index: navigation.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/xdocs/navigation.xml,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** navigation.xml 5 Oct 2005 00:06:30 -0000 1.2 --- navigation.xml 6 Oct 2005 22:32:46 -0000 1.3 *************** *** 12,21 **** <menu name="Overview"> <item name="License" href="/license.html"/> - <item name="Requirements" href="/requirements.html"/> <item name="Downloads" href="downloads.html"/> <item name="Documentation" > ! <item name="Wera manual" href="/articles/manual.html"/> ! <item name="INSTALLATION-TODO" href="installation.html"/> <item name="FAQ" href="faq.html"/> </item> --- 12,19 ---- <menu name="Overview"> <item name="License" href="/license.html"/> <item name="Downloads" href="downloads.html"/> <item name="Documentation" > ! <item name="Wera Manual" href="/articles/manual.html"/> <item name="FAQ" href="faq.html"/> </item> --- requirements.xml DELETED --- |
From: Michael S. <sta...@us...> - 2005-10-06 21:23:29
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20474/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer Modified Files: IaIndexingFilter.java Log Message: Fix up wera references. Point to archive-access. Index: IaIndexingFilter.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** IaIndexingFilter.java 6 Oct 2005 02:31:10 -0000 1.20 --- IaIndexingFilter.java 6 Oct 2005 21:23:17 -0000 1.21 *************** *** 144,158 **** } if (mimetype != null) { ! // wera wants the sub and primary types. int index = mimetype.indexOf('/'); if (index > 0) { ! add(url, doc, "primarytype", mimetype.substring(0, index), ! true, true, true, false); if (index + 1 < mimetype.length()) { ! add(url, doc, "subtype", mimetype.substring(index + 1), ! true, true, true, false); } } - add(url, doc, "type", mimetype, true, false, true, false); } // Add as not lowercased, not stored, indexed, and not tokenized. --- 144,163 ---- } if (mimetype != null) { ! // wera wants the sub and primary types in index. So they are ! // stored but not searchable. nutch adds primary and subtypes ! // as well as complete type all to one 'type' field. ! final String type = "type"; ! add(url, doc, type, mimetype, true, false, true, false); int index = mimetype.indexOf('/'); if (index > 0) { ! String tmp = mimetype.substring(0, index); ! add(url, doc, "primaryType", tmp, true, true, false, false); ! add(url, doc, type, tmp, true, false, true, false); if (index + 1 < mimetype.length()) { ! tmp = mimetype.substring(index + 1); ! add(url, doc, "subType", tmp, true, true, false, false); ! add(url, doc, type, tmp, true, false, true, false); } } } // Add as not lowercased, not stored, indexed, and not tokenized. |
From: Michael S. <sta...@us...> - 2005-10-06 21:23:28
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20474/xdocs Modified Files: faq.fml gettingstarted.xml index.xml Log Message: Fix up wera references. Point to archive-access. Index: index.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/index.xml,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** index.xml 16 Aug 2005 03:28:44 -0000 1.11 --- index.xml 6 Oct 2005 21:23:17 -0000 1.12 *************** *** 25,36 **** <subsection name="Initial alpha release 0.2.1 07/27/2005"> <p>Announcing the initial coordinated alpha release of NutchWAX and ! <a href="http://nwa.nb.no/wera/">WERA</a>. WERA is an archive viewer application that gives an Internet Archive <a href="http://www.archive.org/web/web.php">Wayback Machine</a>-like ! access to web archive collections. WERA is ! part of the <a href="http://nwa.nb.no/">NWA Toolset</a> and is ! available at the NWA site (See ! <a href="gettingstarted.html">Getting started...</a> for download ! and install instructions). There are no release notes accompanying these releases. Rather, see the <a href="https://sourceforge.net/tracker/?group_id=118427&atid=681137">RFE</a> and <a href="https://sourceforge.net/tracker/?group_id=118427&atid=681140">Bug</a> --- 25,32 ---- <subsection name="Initial alpha release 0.2.1 07/27/2005"> <p>Announcing the initial coordinated alpha release of NutchWAX and ! <a href="/projects/wera/">wera</a>. Wera is an archive viewer application that gives an Internet Archive <a href="http://www.archive.org/web/web.php">Wayback Machine</a>-like ! access to web archive collections. There are no release notes accompanying these releases. Rather, see the <a href="https://sourceforge.net/tracker/?group_id=118427&atid=681137">RFE</a> and <a href="https://sourceforge.net/tracker/?group_id=118427&atid=681140">Bug</a> *************** *** 38,47 **** currently outstanding. </p> - <p>Checkout <a href="http://wbsearch.archive.org/wera">wera-demo</a> - (and the - <a href="http://wbsearch02.archive.org:8080/nutchwax">nutchwax-demo</a> - instance its using) for a sometimes demo going against an index of a - million pages made of 3 crawls of of the May 2005 British National - Election.</p> </subsection> </section> --- 34,37 ---- Index: faq.fml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/faq.fml,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** faq.fml 6 Oct 2005 19:14:47 -0000 1.9 --- faq.fml 6 Oct 2005 21:23:17 -0000 1.10 *************** *** 11,15 **** <a href="http://www.archive.org/web/web.php">Wayback Machine</a> or with the freely available ! <a href="http://nwa.nb.no/wera/">WERA</a> application, you have a complete access tool for SMALL web archive collections (There are known issues running against large collections). --- 11,15 ---- <a href="http://www.archive.org/web/web.php">Wayback Machine</a> or with the freely available ! <a href="/projects/wera/">wera</a> application, you have a complete access tool for SMALL web archive collections (There are known issues running against large collections). Index: gettingstarted.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/gettingstarted.xml,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** gettingstarted.xml 1 Sep 2005 20:58:24 -0000 1.9 --- gettingstarted.xml 6 Oct 2005 21:23:17 -0000 1.10 *************** *** 47,67 **** </subsection> ! <subsection name="WERA"> <p>You'll notice that clicking on a NutchWAX search result takes you nowhere. This is because NutchWAX by default has no means of rendoring the search hit out of the web archive collection. ! <a href="http://nwa.nb.no/wera/">WERA</a> is an application that can do this for you. WERA is like the Internet Archive <a href="http://www.archive.org/web/web.php">Wayback Machine</a> except it also supports full text search (courtesy of NutchWAX). It is a php ! application for searching and browsing WACs that is part of ! the <a href="http://nwa.nb.no/">NWA Toolset</a>. ! See the <a href="http://nwa.nb.no/wera/">WERA</a> for how to install. ! It comes with a nice java-based installer that will run whether or ! not X is installed. Be aware that your php install must include the xml module -- check phpinfo output -- and that configuration is kept ! in <code>wera/lib/config.inc</code> (There is also a hardcoding of ! path to arcs in <code>wera/lib/seal/nutch.inc</code> that you'll ! have to manually edit). </p> </subsection> --- 47,63 ---- </subsection> ! <subsection name="wera"> <p>You'll notice that clicking on a NutchWAX search result takes you nowhere. This is because NutchWAX by default has no means of rendoring the search hit out of the web archive collection. ! <a href="/projects/wera/">WERA</a> is an application that can do this for you. WERA is like the Internet Archive <a href="http://www.archive.org/web/web.php">Wayback Machine</a> except it also supports full text search (courtesy of NutchWAX). It is a php ! application for searching and browsing WACs. ! See the <a href="/projects/wera/">wera</a> for how to install. ! Be aware that your php install must include the xml module -- check phpinfo output -- and that configuration is kept ! in <code>wera/lib/config.inc</code>. </p> </subsection> |
From: Michael S. <sta...@us...> - 2005-10-06 21:23:26
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/query-ia/src/java/org/archive/access/nutch/searcher In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20474/src/plugin/query-ia/src/java/org/archive/access/nutch/searcher Modified Files: IaTypeQueryFilter.java Log Message: Fix up wera references. Point to archive-access. Index: IaTypeQueryFilter.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/query-ia/src/java/org/archive/access/nutch/searcher/IaTypeQueryFilter.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** IaTypeQueryFilter.java 29 Jun 2005 13:46:48 -0000 1.1 --- IaTypeQueryFilter.java 6 Oct 2005 21:23:17 -0000 1.2 *************** *** 9,13 **** public class IaTypeQueryFilter extends RawFieldQueryFilter { public IaTypeQueryFilter() { ! super("type"); } } --- 9,13 ---- public class IaTypeQueryFilter extends RawFieldQueryFilter { public IaTypeQueryFilter() { ! super("type", true, 0.1f); } } |
From: Michael S. <sta...@us...> - 2005-10-06 19:28:50
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/conf In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22427/conf Modified Files: nutch-site.xml.all Log Message: * conf/nutch-site.xml.all Enable indexing of all in the default nutchwax config (.all). Index: nutch-site.xml.all =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/conf/nutch-site.xml.all,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** nutch-site.xml.all 6 Oct 2005 01:07:31 -0000 1.2 --- nutch-site.xml.all 6 Oct 2005 19:28:40 -0000 1.3 *************** *** 1,5 **** <?xml version="1.0"?> ! <!-- Internet Archive Nutch configuration --> <nutch-conf> --- 1,7 ---- <?xml version="1.0"?> ! <!--Internet Archive Nutch configuration. ! This config. is what gets built into nutchwax. ! --> <nutch-conf> *************** *** 8,12 **** ! <!-- enable parse-ext --> <property> <name>plugin.includes</name> --- 10,16 ---- ! <!-- Enable parse-ext (parse-ext is a parser that calls the 'ext'ernal program ! xpdf to parse pdf files. Also enable parse-default and the ia plugins. ! --> <property> <name>plugin.includes</name> *************** *** 50,53 **** --- 54,59 ---- </property> + + <!-- For lucene indexes, normally. The default is 128. Write every 1024 entries rather than every 128, the default. *************** *** 78,81 **** --- 84,88 ---- </property> + <!-- make summaries a little longer than the default --> <property> *************** *** 96,110 **** </property> ! <!-- the name of the archive server hosting this archive --> <property> ! <name>archive.host</name> ! <value>crawls.archive.org</value> </property> ! <!-- the name of this archive collection --> <property> <name>archive.collection</name> <value>be05</value> </property> <!--Optionally, hardcode the nutch datadir location rather --- 103,122 ---- </property> ! <!-- the name of the server hosting collections.--> <property> ! <name>collections.host</name> ! <value>collections.example.org</value> </property> ! <!-- The name of this archive collection. ! DEPRECATED. Now search.jsp uses the 'collection' returned by the search ! result drawing up the wayback URL and at index time, use the ! command-line 'collection' option. ! <property> <name>archive.collection</name> <value>be05</value> </property> + --> <!--Optionally, hardcode the nutch datadir location rather *************** *** 121,125 **** <property> <name>archive.index.all</name> ! <value>false</value> </property> </nutch-conf> --- 133,137 ---- <property> <name>archive.index.all</name> ! <value>true</value> </property> </nutch-conf> |
From: Sverre B. <sv...@us...> - 2005-10-06 19:19:13
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20407 Added Files: metadata.php Log Message: new file --- NEW FILE: metadata.php --- <?php header("Content-Type: text/html; charset=UTF-8"); include_once("lib/config.inc"); include($conf_includepath . "/header.inc"); ?> </HEAD> <BODY><center> <font face="helvetica,arial,sans-serif"> <table align="center" class="resultsborder" border="0" cellspacing="0" cellpadding="0" width="90%"> <tr> <td> <?php include_once($conf_index_file); include_once("$conf_searchenginepath/indexUtils.inc"); include ("$conf_includepath/meta.inc"); include ("$conf_includepath/documentLocator.inc"); $url = $_REQUEST['url']; $time = $_REQUEST['time']; $aid = $_REQUEST['aid']; $urlnotfound = false; if (!isset($aid)) { $searchEngine = new $conf_index_class(); $locator = new documentLocator(); if (!isset($timestamp)) { $locator->initialize($searchEngine, $url, false, $timestamp, 'LAST'); } else { $locator->initialize($searchEngine, $url, false, $timestamp, 'NEAR'); } $numhits = $locator->findVersions(); if($numhits <= 0) { $urlnotfound = true; } $result = $locator->getResultSet(); $document = $result[1]; $aid = $document['archiveidentifier']; } if ($urlnotfound) { print "Sorry, the url " . $url . " was not found in the <a href=" . $locator->getQueryUrl() . ">index</a>"; } else { $names = array ( "aid" => "Archive Identifier (aid)", "url" => "Url", "archival_time" => "Time of archival", "last_modified_time" => "Last modified time", "type" => "Mime-type", "charset" => "Character Encoding", "filestatus" => "File Status", "content_checksum" => "Content Checksum", "http-header" => "HTTP Header" ); $metaParser = new metaParser($aid); if ($metaParser->doParseMeta()) { $metadata = $metaParser->getMetadata(); print "<h1>Metadata</h1>\n"; print "<table class=\"resultsborder\">"; foreach ($metadata as $k => $v) { echo "<tr><td><b>"; echo $names[$k]; print "</b></td></tr><tr><td> $v</td></tr>"; } print "</table>"; } else { print $metaParser->getErrorMessage(); } } ?> </td> </tr> </table> </center> <?php include($conf_includepath . "/footer.inc"); ?> |
From: Sverre B. <sv...@us...> - 2005-10-06 19:18:51
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv20327/lib/seal Modified Files: nutch.inc Log Message: Index: nutch.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal/nutch.inc,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** nutch.inc 6 Oct 2005 17:41:14 -0000 1.4 --- nutch.inc 6 Oct 2005 19:18:42 -0000 1.5 *************** *** 294,298 **** case "NUTCH:ENCODING": if (in_array("encoding", $this->resultfields)) { ! $this->mime[$this->hitno]['encoding'] .= $data; } break; --- 294,298 ---- case "NUTCH:ENCODING": if (in_array("encoding", $this->resultfields)) { ! $this->resultset[$this->hitno]['encoding'] .= $data; } break; |
From: Michael S. <sta...@us...> - 2005-10-06 19:14:59
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19265/xdocs Modified Files: faq.fml Log Message: * xdocs/faq.fml Note on querying for mimetypes. Index: faq.fml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/faq.fml,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** faq.fml 6 Oct 2005 17:38:25 -0000 1.8 --- faq.fml 6 Oct 2005 19:14:47 -0000 1.9 *************** *** 201,206 **** </faq> <faq id="datesort"> ! <question>How to sort by date? </question> --- 201,227 ---- </faq> + <faq id="encoding"> + <question>Why is encoding of non-ascii characters all messed up? + </question> + <answer> + <p>See <i>useBodyEncodingForURI</i> in the <a + href="http://jakarta.apache.org/tomcat/tomcat-5.5-doc/config/ajp.html">Tomcat Configuration Reference</a>. Edit <code>$TOMCAT_HOME/conf/server.xml</code> + and add <i>useBodyEncodingForURI=true</i>. Here is what it looks like + when edit has been added: + <pre><!-- Define a non-SSL HTTP/1.1 Connector on port 8080 --> + <Connector port="8080" maxHttpHeaderSize="8192" + maxThreads="150" minSpareThreads="25" maxSpareThreads="75" + enableLookups="false" redirectPort="8443" acceptCount="100" + connectionTimeout="20000" disableUploadTimeout="true" + useBodyEncodingForURI="true" + /></pre> + </p> + </answer> + </faq> + </part> + <part id="querying"> + <title>Querying</title> <faq id="datesort"> ! <question>How to sort results by date? </question> *************** *** 218,229 **** </p></answer> </faq> ! <faq id="encoding"> ! <question>Why is encoding of non-ascii characters all messed up? ! </question> ! <answer> ! <p>See <i>useBodyEncodingForURI</i> in the <a ! href="http://jakarta.apache.org/tomcat/tomcat-5.5-doc/config/ajp.html">Tomcat Configuration Reference</a>.</p> ! </answer> ! </faq> ! </part> </faqs> --- 239,254 ---- </p></answer> </faq> ! <faq> ! <question>How to query for mimetypes? ! </question> ! <answer> ! <p>Use <i>type</i> query field name. Nutchwax -- like nutch -- adds the ! mimetype, the primary type and subtype to a <i>type</i> field. This ! means that you can query for the mimetypes 'text/html' by querying ! <code>type:text/html</code>, or for primary type 'text' by ! querying <code>type:text</code>, or for subtype 'html' by querying ! <code>type:html</code>, etc.</p> ! </answer> ! </faq> ! </part> </faqs> |
From: Michael S. <sta...@us...> - 2005-10-06 18:19:51
|
Update of /cvsroot/archive-access/archive-access/projects/wera/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3549/xdocs Modified Files: faq.fml Log Message: * xdocs/faq.fml Note on encoding. Index: faq.fml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/xdocs/faq.fml,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** faq.fml 4 Oct 2005 22:59:28 -0000 1.1 --- faq.fml 6 Oct 2005 18:19:43 -0000 1.2 *************** *** 2,11 **** <faqs title="Frequently Asked Questions"> <part id="general"> ! <faq id="about"> ! <question> ! TODO </question> <answer> ! <p>TODO </p> </answer> </faq> --- 2,13 ---- <faqs title="Frequently Asked Questions"> <part id="general"> ! <faq id="encoding"> ! <question>Why is encoding of non-ascii characters all messed up? </question> <answer> ! <p>See <i>useBodyEncodingForURI</i> in the <a ! href="http://jakarta.apache.org/tomcat/tomcat-5.5-doc/config/ajp.html">Tomcat Configuration Reference</a>. See ! <a href="http://archive-access.sourceforge.net/projects/nutch/faq.html#encoding">Nutchwax FAQ</a> for more detail. ! </p> </answer> </faq> |
From: Sverre B. <sv...@us...> - 2005-10-06 17:41:23
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26051/lib/seal Modified Files: nutch.inc Log Message: http://sourceforge.net/tracker/index.php?func=detail&aid=1314403&group_id=118427&atid=681137 Index: nutch.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal/nutch.inc,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** nutch.inc 5 Oct 2005 22:42:47 -0000 1.3 --- nutch.inc 6 Oct 2005 17:41:14 -0000 1.4 *************** *** 291,295 **** $this->mime[$this->hitno]['sub'] .= $data; } ! break; } } --- 291,300 ---- $this->mime[$this->hitno]['sub'] .= $data; } ! break; ! case "NUTCH:ENCODING": ! if (in_array("encoding", $this->resultfields)) { ! $this->mime[$this->hitno]['encoding'] .= $data; ! } ! break; } } |
From: Michael S. <sta...@us...> - 2005-10-06 17:38:33
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25138/xdocs Modified Files: faq.fml Log Message: * xdocs/faq.fml Note on encoding. Index: faq.fml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/faq.fml,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** faq.fml 9 Sep 2005 23:43:10 -0000 1.7 --- faq.fml 6 Oct 2005 17:38:25 -0000 1.8 *************** *** 218,221 **** --- 218,229 ---- </p></answer> </faq> + <faq id="encoding"> + <question>Why is encoding of non-ascii characters all messed up? + </question> + <answer> + <p>See <i>useBodyEncodingForURI</i> in the <a + href="http://jakarta.apache.org/tomcat/tomcat-5.5-doc/config/ajp.html">Tomcat Configuration Reference</a>.</p> + </answer> + </faq> </part> </faqs> |
From: Michael S. <sta...@us...> - 2005-10-06 17:35:15
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24686/src/web Modified Files: search.jsp Log Message: * src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java * src/web/search.jsp Set config in tomcat rather than here. See faq in wera on encodings. Index: search.jsp =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/web/search.jsp,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** search.jsp 6 Oct 2005 01:07:32 -0000 1.22 --- search.jsp 6 Oct 2005 17:35:02 -0000 1.23 *************** *** 36,43 **** queryString = ""; } - // From Oskar and Lukas. Shouldn't be needed but looks like it is. - // Would be consistent with this advice: - // http://www.jguru.com/faq/view.jsp?EID=391295 - queryString = new String(queryString.getBytes("ISO-8859-1"), "UTF-8"); String htmlQueryString = Entities.encode(queryString); --- 36,39 ---- |
From: Michael S. <sta...@us...> - 2005-10-06 17:35:13
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24686/src/java/org/archive/access/nutch Modified Files: NutchwaxOpenSearchServlet.java Log Message: * src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java * src/web/search.jsp Set config in tomcat rather than here. See faq in wera on encodings. Index: NutchwaxOpenSearchServlet.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** NutchwaxOpenSearchServlet.java 6 Oct 2005 01:45:35 -0000 1.2 --- NutchwaxOpenSearchServlet.java 6 Oct 2005 17:35:02 -0000 1.3 *************** *** 50,67 **** // Make a delegating method that preprocesses the query string // converting any exacturl values so they'll pass the NutchAnalysis. - // Also make it so we encode the parameter strings properly. HttpServletRequest delegatingReq = new HttpServletRequest() { public String getParameter(String parameter) { String q = req.getParameter(parameter); ! if (parameter == null || !parameter.equals("query") || ! q == null) { ! return q; ! } ! try { ! q = new String(q.getBytes("ISO-8859-1"), "UTF-8"); ! } catch (java.io.UnsupportedEncodingException e) { ! throw new RuntimeException(e); ! } ! return NutchwaxQuery.encodeExacturl(q); } --- 50,58 ---- // Make a delegating method that preprocesses the query string // converting any exacturl values so they'll pass the NutchAnalysis. HttpServletRequest delegatingReq = new HttpServletRequest() { public String getParameter(String parameter) { String q = req.getParameter(parameter); ! return (parameter != null && parameter.equals("query"))? ! NutchwaxQuery.encodeExacturl(q): q; } |
From: Michael S. <sta...@us...> - 2005-10-06 02:31:18
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17728/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer Modified Files: IaIndexingFilter.java Log Message: * src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java Add 'encoding' to the result. Index: IaIndexingFilter.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/plugin/index-ia/src/java/org/archive/access/nutch/indexer/IaIndexingFilter.java,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** IaIndexingFilter.java 20 Aug 2005 00:09:37 -0000 1.19 --- IaIndexingFilter.java 6 Oct 2005 02:31:10 -0000 1.20 *************** *** 60,63 **** --- 60,69 ---- public static final String EXACTURL_KEY = "exacturl"; + + /** + * Set into metadata by the nutch html parser. + */ + private static final String ENCODING_KEY = "CharEncodingForConversion"; + private MessageDigest md = null; *************** *** 106,109 **** --- 112,118 ---- LOGGER.info("No metadata for " + doc.toString()); } else { + // Add as stored, unindexed, and untokenized. + add(url, doc, "encoding", p.getProperty(ENCODING_KEY), + false, true, true, false); // Add as stored, indexed, and untokenized. add(url, doc, ARCCOLLECTION_KEY, p.getProperty(ARCCOLLECTION_KEY), *************** *** 139,146 **** if (index > 0) { add(url, doc, "primarytype", mimetype.substring(0, index), ! true, true, false, false); if (index + 1 < mimetype.length()) { add(url, doc, "subtype", mimetype.substring(index + 1), ! true, true, false, false); } } --- 148,155 ---- if (index > 0) { add(url, doc, "primarytype", mimetype.substring(0, index), ! true, true, true, false); if (index + 1 < mimetype.length()) { add(url, doc, "subtype", mimetype.substring(index + 1), ! true, true, true, false); } } |
From: Sverre B. <sv...@us...> - 2005-10-06 02:07:47
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13143/lib Modified Files: config.inc.template config.inc Log Message: Now possible to turn of displaying number of versions per hit (expensive) Index: config.inc.template =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/config.inc.template,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** config.inc.template 4 Oct 2005 22:59:27 -0000 1.1 --- config.inc.template 6 Oct 2005 02:07:37 -0000 1.2 *************** *** 61,67 **** $conf_document_retriever = "$document_retriever?reqtype=getfile&aid="; ! // URL of gui installation $conf_http_host = "@guiUrl@"; // Logo $conf_logo ="$conf_http_host/images/wera.png"; --- 61,77 ---- $conf_document_retriever = "$document_retriever?reqtype=getfile&aid="; ! // URL of ui installation $conf_http_host = "@guiUrl@"; + // Set to true if you want number of versions + // to show up in each single search result (expensive on NutchWax) + // otherwise set to false + $conf_show_num_verions = true; + // Set to true if you want number of versions matching + // query to show up in each single search result, + // otherwise set to false + // Only kicks in if $conf_show_num_verions is true + $conf_show_num_verions_matching_query = true; + // Logo $conf_logo ="$conf_http_host/images/wera.png"; Index: config.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/config.inc,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** config.inc 5 Oct 2005 22:42:47 -0000 1.4 --- config.inc 6 Oct 2005 02:07:37 -0000 1.5 *************** *** 43,48 **** // What search engine to use $conf_searchengine = "nutch"; ! #$conf_searchengine_url = "http://wbsearch04.archive.org:8080/nutchwax/opensearch"; ! $conf_searchengine_url = "http://localhost:8082/nutchwax/opensearch"; $conf_index_file = $conf_searchenginepath . "/" . $conf_searchengine . ".inc"; $conf_index_class = $conf_searchengine . "Search"; --- 43,48 ---- // What search engine to use $conf_searchengine = "nutch"; ! $conf_searchengine_url = "http://wbsearch04.archive.org:8080/nutchwax/opensearch"; ! #$conf_searchengine_url = "http://localhost:8082/nutchwax/opensearch"; $conf_index_file = $conf_searchenginepath . "/" . $conf_searchengine . ".inc"; $conf_index_class = $conf_searchengine . "Search"; *************** *** 54,68 **** // // TODO : Move this into the ARC Retriever ! #$conf_aid_prefix = "/2/katrina/nutch-data/arcs/"; ! $conf_aid_prefix = "/home/sverreb/apps/heritrix-1.4.0/jobs/lux2-20051004171719798/arcs/"; $conf_aid_suffix = ".arc.gz"; // Prefix to document retriever ! #$document_retriever = "http://wbsearch04.archive.org:8080/ArcRetriever/ArcRetriever"; ! $document_retriever = "http://localhost:8082/ArcRetriever/ArcRetriever"; $conf_document_retriever = "$document_retriever?reqtype=getfile&aid="; ! // URL of gui installation $conf_http_host = "http://localhost/aaWera"; // Logo --- 54,78 ---- // // TODO : Move this into the ARC Retriever ! $conf_aid_prefix = "/2/katrina/nutch-data/arcs/"; ! #$conf_aid_prefix = "/home/sverreb/apps/heritrix-1.4.0/jobs/lux2-20051004171719798/arcs/"; $conf_aid_suffix = ".arc.gz"; // Prefix to document retriever ! $document_retriever = "http://wbsearch04.archive.org:8080/ArcRetriever/ArcRetriever"; ! #$document_retriever = "http://localhost:8082/ArcRetriever/ArcRetriever"; $conf_document_retriever = "$document_retriever?reqtype=getfile&aid="; ! // URL of ui installation $conf_http_host = "http://localhost/aaWera"; + // Set to true if you want number of versions + // to show up in each single search result (expensive on NutchWax) + // otherwise set to false + $conf_show_num_verions = true; + // Set to true if you want number of versions matching + // query to show up in each single search result, + // otherwise set to false + // Only kicks in if $conf_show_num_verions is true + $conf_show_num_verions_matching_query = true; + // Logo |
From: Sverre B. <sv...@us...> - 2005-10-06 02:07:45
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13143 Modified Files: index.php Log Message: Now possible to turn of displaying number of versions per hit (expensive) Index: index.php =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/index.php,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** index.php 5 Oct 2005 01:38:18 -0000 1.3 --- index.php 6 Oct 2005 02:07:37 -0000 1.4 *************** *** 123,137 **** <table border='0' cellpadding='0' cellspacing='0' width=90%> <tr> ! <td class="norm" colspan="5" align="left"><img alt='' height='8' src='/images/1px.gif' width='1'></td> </tr> <tr> ! <td colspan='5' class='border'><img alt='' height='2' src='/images/1px.gif' width='1'></td> </tr> <tr> ! <td colspan='5'><img src='/images/1px.gif' width='1' height='5' alt=''></td> </tr> <tr> <td class="shade" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> - <td class="shade" width="5%"><?php print(nls("Match"));?>:</td> <td class="shade"><?php print(nls("Query:"));?></td> <td class="shade" align="right"><a href="<?php print($conf_helplinks['search']['file']);?>"> --- 123,136 ---- <table border='0' cellpadding='0' cellspacing='0' width=90%> <tr> ! <td class="norm" colspan="4" align="left"><img alt='' height='8' src='/images/1px.gif' width='1'></td> </tr> <tr> ! <td colspan='4' class='border'><img alt='' height='2' src='/images/1px.gif' width='1'></td> </tr> <tr> ! <td colspan='4'><img src='/images/1px.gif' width='1' height='5' alt=''></td> </tr> <tr> <td class="shade" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> <td class="shade"><?php print(nls("Query:"));?></td> <td class="shade" align="right"><a href="<?php print($conf_helplinks['search']['file']);?>"> *************** *** 141,160 **** <tr> <td class="shade" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> - <td class="shade"><form name='search' action=<? echo $_SERVER['PHP_SELF']; ?> method='get'> <?php $query = trim(stripslashes($query)); ! if ($querytype == "phrase") { ! $query = str_replace('"', '', $query); ! } ?> ! <select name='querytype'> ! <option value=all <?php if ($querytype=="all") print "selected"?>><?php print(nls("All words"));?> ! <option value=any <?php if ($querytype=="any") print "selected"?>><?php print(nls("Any word"));?> ! <option value=phrase <?php if ($querytype=="phrase") print "selected"?>><?php print(nls("Exact phrase"));?> ! </select> ! </td> <td colspan="3"> <input type='text' name='query' value='<?php print $query; ?>' class="searchtext" size="50"/> <input type='submit' value='<?php print(nls("Search"));?>' class="searchbutton" onClick="submitForm(0);"/> --- 140,152 ---- <tr> <td class="shade" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> <?php $query = trim(stripslashes($query)); ! ?> ! <td colspan="3"> + <form name='search' action=<? echo $_SERVER['PHP_SELF']; ?> method='get'> <input type='text' name='query' value='<?php print $query; ?>' class="searchtext" size="50"/> <input type='submit' value='<?php print(nls("Search"));?>' class="searchbutton" onClick="submitForm(0);"/> *************** *** 162,170 **** </tr> <tr> ! <td class="shade" height="30" colspan="2"> </td> <td colspan="3" class="shade" valign="bottom"><?php print(nls("Year"));?> <?php print(nls("(from - to)"))?></td> </tr> <tr> ! <td class="shade" height="30" colspan="2"> </td> <td colspan="3"> <input name='year_from' size=4 maxlength="4" type="text" value='<?php print $year_from;?>'/> - --- 154,162 ---- </tr> <tr> ! <td class="shade" height="30"> </td> <td colspan="3" class="shade" valign="bottom"><?php print(nls("Year"));?> <?php print(nls("(from - to)"))?></td> </tr> <tr> ! <td class="shade" height="30"> </td> <td colspan="3"> <input name='year_from' size=4 maxlength="4" type="text" value='<?php print $year_from;?>'/> - *************** *** 256,284 **** print "(".$value['url'].")<br>"; print "(".$value['description'].")<br>"; - $search2 = new $conf_index_class (); - $vquery = $query . " exacturl:" . urlencode($value["url"]); - $search2->setQuery($vquery); - $search2->unsetSupressDuplicates(); - $search2->setSortorder("descending"); - $search2->setSizeOfResultSet(1); - $search2->setOffset(0); - $search2->setFieldsInResult(date); - if ($search2->doQuery()) { - $versions = $search2->getResultSet(); - $numversions = $search2->getNumHitsTotal(); - } - else { - $numversions = "<b>? <a href=\"" . $search2->queryurl . "\">" . $search2->getErrorMessage() . "</a></b>"; - } - $search2->setQuery("exacturl:" . urlencode($value["url"])); - if ($search2->doQuery()) { - $totalversions = $search2->getNumHitsTotal(); - } - else { - $totalversions = "<b>? <a href=\"" . $search2->queryurl . "\">" . $search2->getErrorMessage() . "</a></b>"; - } ! print nls("Number of versions satisfying query")." / ".nls("total number of versions")." : "; ! print $numversions."/".$totalversions."<br>"; $linkstring = "<a href=\"result.php?time=".$versions[1]['date']."&url=".index_encode($value["url"])."\">".nls("Timeline")."</a>"; $overview = "<a href=\"overview.php?url=".index_encode($value["url"])."\" >".nls("Overview")."</a>"; --- 248,292 ---- print "(".$value['url'].")<br>"; print "(".$value['description'].")<br>"; ! if ($conf_show_num_verions) { ! $search2 = new $conf_index_class (); ! $search2->unsetSupressDuplicates(); ! $search2->setSortorder("descending"); ! $search2->setSizeOfResultSet(1); ! $search2->setOffset(0); ! $search2->setFieldsInResult(date); ! $numversions_text1 = ""; ! $numversions_text2 = ""; ! ! if ($conf_show_num_verions_matching_query) { ! $vquery = $querystring . " exacturl:" . urlencode($value["url"]); ! $search2->setQuery($vquery); ! ! if ($search2->doQuery()) { ! $versions = $search2->getResultSet(); ! $numversions = $search2->getNumHitsTotal(); ! } ! else { ! $numversions = "<b>? <a href=\"" . $search2->queryurl . "\">" . $search2->getErrorMessage() . "</a></b>"; ! } ! $numversions_text1 = "(" . nls("matching query")."/". nls(total) . ")"; ! $numversions_text2 = $numversions."/"; ! } ! ! ! $search2->setQuery("exacturl:" . urlencode($value["url"])); ! if ($search2->doQuery()) { ! $totalversions = $search2->getNumHitsTotal(); ! } ! else { ! $totalversions = "<b>? <a href=\"" . $search2->queryurl . "\">" . $search2->getErrorMessage() . "</a></b>"; ! } ! ! print nls("Versions") . " "; ! print $numversions_text1 . " "; ! print $numversions_text2 . $totalversions."<br>"; ! } ! ! $linkstring = "<a href=\"result.php?time=".$versions[1]['date']."&url=".index_encode($value["url"])."\">".nls("Timeline")."</a>"; $overview = "<a href=\"overview.php?url=".index_encode($value["url"])."\" >".nls("Overview")."</a>"; |