From: Michael S. <sta...@us...> - 2005-10-04 22:59:42
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23369/wera/src/webapps/wera/lib/seal Added Files: fast.inc indexSearch.inc indexUtils.inc nutch.inc Log Message: First time add of wera. Moved here from nwa.nb.no. --- NEW FILE: indexSearch.inc --- <?php /* * This file is part of WERA. * * Copyright (C) 2001-2002 Royal Library in Stockholm, * Royal Library in Copenhagen, * Helsinki University Library of Finland, * National Library of Norway, * National and University Library of Iceland. * * WERA is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * WERA is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WERA; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * The IndexSearch class defines methods * for interfacing "any" search engine * * To interface your own search engine * use this class as the superclass. * * $Id: indexSearch.inc,v 1.1 2005/10/04 22:59:28 stack-sf Exp $ */ class indexSearch { var $query; var $numhits; var $numhitstotal; var $hitsperset; var $offset; var $timespent; var $resultfields = array(); var $resultset = array(); var $sortorder; var $errormsg; /** * Set the result set size * * Sets the number of hits returned by the * getResultSet and getXmlResultSet methods * * @param integer Number of hits to return */ function setSizeOfResultSet($hits) { $this->hitsperset = $hits; } /** * Set the offset * * Sets the offset for the result set. An offset of 10 * and a result set size of 10 will give a result * set with hits 11-20 (assuming the hitnumbers * start with 1) * * @param integer Offset */ function setOffset($offset) { $this->offset=$offset; } /** * Set the sort order * * @param string Sort order */ function setSortOrder($sortorder) { $this->sortorder=$sortorder; } /** * Get the query response time * * @return float Time spent on query */ function getTimeSpent() { return $this->timespent; } /** * Get number of hits in resultset * * @return number of hits */ function getNumHits() { return $this->numhits; } /** * Get the total number of hits * * @return number of hits */ function getNumHitsTotal() { return $this->numhitstotal; } /** * Set query string for search * * The query string must conform to the syntax * outlined below. The query string may be in * upper, lower or mixed case. * * Searching text indices: * schema.index:"word" - Word match * schema.index:"word*" - Right hand truncation, match for part of word * schema.index:"phrase" - Phrase match * * Searching integer indices: * schema.index:i - Exact match * schema.index:[i;j] - Range(including boundaries) * schema.index:[i;] - Larger than or equal to * schema.index:[;i] - Smaller than or equal to * schema.index:>i - Larger than * schema.index:<i - Smaller than * * In the context of the NWA Access Module schema will be NWA. * The index names will be defined by the NWA document format * (http://nwa.nb.no/nwa/export/1.0/). * * Boolean operators: * The NWA query language supports the boolean operators: AND, OR, ANDNOT * * Parenthesis: * The left parenthesis "(" and right parenthesis ")" are used * to dilineate one expression from another. For example, in the query * * nwa.dcdate:998690406 AND (nwa.title:"ouagadougou" * OR nwa.title:"capitol of burkina fazo"), * * parentheses group the ORs together so they are as a distinct * entity from the AND. * * @param string Query string */ function setQuery($querystring) { $this->query = $querystring; } /** * Sets which fields to include in the result * set returned by the getResultSet and * getXmlResultSet methods. * * $fields : a comma, or whitespace limited * list of result fields. * */ function setFieldsInResult($fields) { } /** * Executes the query. * * This method will populate the result set array * Returns an array of the following form: * Array([0]=>Array([dcdate]=>"2001-10-14"[dctitle]=>"Some title") * [1]=>Array([dcdate]=>"2001-10-15"[dctitle]=>"Another title")) * the fields returned in the array is determined by the * setResultFields method (e.g. setResultFields("dcdate, dctitle") * * @return boolean False if error */ function doQuery() { return true; } /** * Get the result set * * @return array Result set */ function getResultSet() { return $this->resultset; } /** * If the doQuery method returned false * use this to fetch the error message string * * @return string Error message */ function getErrorMessage() { return $this->errormsg; } /** * Get the result set * * The method will return an xml formatted result of the following form: * * <?xml version="1.0" encoding="utf-8"?> * <resultset query="query" sortorder="descending" fields="dcdate, dctitle" hits=10 totalhits="112" spanstart="1" spanend="10" timespent="0.0017489194869995"> * <doc id="1"> * <dcdate></dcdate> * <dctitle></dctitle> * </doc> * <doc id="2"> * <dcdate></dcdate> * <dctitle></dctitle> * </doc> * .. * . * </resultset> * * @return string Result set */ function getXmlResultSet() { global $HTTP_SERVER_VARS; $address = "http://" . $HTTP_SERVER_VARS["HTTP_HOST"] . $HTTP_SERVER_VARS["SCRIPT_NAME"]; $resultsetarray=$this->getResultSet(); $spanstart=$this->offset+1; if ($this->numhitstotal==0) { $spanend=""; $spanstart=""; } elseif ($this->numhits < $this->hitsperset) { $spanend=$this->offset+$this->numhits; } else { $spanend=$this->offset+$this->hitsperset; } $retval='<?xml version="1.0" encoding="utf-8"?>'."\n"; $retval.=sprintf('<resultset address="%s" query="%s" sortorder="%s" fields="%s" hits="%d" totalhits="%d" hitsperset="%d" offset="%d" spanstart="%d" spanend="%d" timespent="%s">', $address, $this->query, $this->sortorder, implode(" ", $this->resultfields), $this->numhits, $this->numhitstotal, $this->hitsperset, $this->offset, $spanstart, $spanend, $this->timespent) . "\n"; $i=$this->offset; while (list($setkey,$result)=each($this->resultset)) { $retval.=' <doc id="'.++$i.'">'."\n"; while (list($key,$val)=each($result)) { $retval.=" <".$key.">".$val."</".$key.">\n"; } $retval.=" </doc>\n"; } $retval.="</resultset>\n"; return $retval; } } ?> --- NEW FILE: indexUtils.inc --- <?php /* * This file is part of WERA. * * Copyright (C) 2001-2002 Royal Library in Stockholm, * Royal Library in Copenhagen, * Helsinki University Library of Finland, * National Library of Norway, * National and University Library of Iceland. * * WERA is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * WERA is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WERA; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * Some indexers ignore non-letter characters like : / + etc. * In order to enable query for words including such * characters some parts of the documents has * to be encoded before indexing is done. * Encoding then also has to be done to the parts of the * query string that queries the encoded indexes. * * $Id: indexUtils.inc,v 1.1 2005/10/04 22:59:28 stack-sf Exp $ * */ /** * Index encoding * * URL-encode according to RFC1738 and encode also "-", "_" and "." * Replaces '%' with 'INDX' * * Make sure that your input is not already urlencoded before using this. * * @param string string to be encoded * @return string encoded string */ function index_encode($input) { $output = rawurlencode($input); $output = preg_replace(array ("/%/", "/\./", "/-/", "/_/"), array ("INDX", "INDXDOT", "INDXDASH", "INDXUNDERLINE"), $output); return $output; } /** * Index decoding * * Decode Index encoded strings * * @param string string to be decoded * @return string decoded string */ function index_decode($input) { $output = preg_replace( array("/INDXDOT/", "/INDXDASH/", "/INDXUNDERLINE/"), array(".", "-", "_"), $input); $output = rawurldecode( str_replace("INDX", "%", $output) ); return $output; } /** * Time conversion * * Converts UTC time of format YYYYMMDDHH24MISS (e.g. 20020101120000) * to integer i.e. number of seconds since the Unix Epoch (January 1 1970 00:00:00 GMT). * Use this function if your search engine supports only 32-bit unsigned integers. * * If the length of the time input string is not exactly 14 input is returned unaltered. * * @param time Time to be converted * @return string Time in seconds since unix epoch */ function convert_time_to_int($time) { if (strlen($time) == 14) { $retval= gmmktime ( substr($time, 8, 2), substr($time, 10, 2), substr($time, 12, 2), substr($time, 4, 2), substr($time, 6, 2), substr($time, 0, 4)); } else { $retval = $time; } return $retval; } ?> --- NEW FILE: fast.inc --- <?php /* * This file is part of WERA. * * Copyright (C) 2001-2002 Royal Library in Stockholm, * Royal Library in Copenhagen, * Helsinki University Library of Finland, * National Library of Norway, * National and University Library of Iceland. * * WERA is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * WERA is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WERA; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * File: fast.inc * * This file is used by the fastSearch() class. * It has to reside in the $conf_index_path given in config.inc * * * $Id: fast.inc,v 1.1 2005/10/04 22:59:28 stack-sf Exp $ */ include("$conf_searchenginepath/indexSearch.inc"); include("$conf_searchenginepath/indexUtils.inc"); function HumanTimeToISOTime($time){ // Convert time string to ISO8601 // In YYYYMMDDhhmmss // Out YYYY-MM-DDThh:mm:ss $isotime = ''; if (strlen($time) == 14) { $isotime = substr($time, 0, 4) . '-' . substr($time, 4, 2) . '-' . substr($time, 6, 2) . 'T' . substr($time, 8, 2) . ':' . substr($time, 10, 2) . ':' . substr($time, 12, 2) . "Z"; } return $isotime; } /** * Class for querying FAST Data Search * * For further information about constructing * queries and setting parameters for the fastSearch * class see Fast Data Search Documentation * */ class fastSearch extends indexSearch { // Fast specific variables var $host; var $port; var $uri; var $querytype; var $hosturl; var $fastadaptedquery; var $offset; var $hitsperset; var $sort = array("byfield" => "", "sortdirection" => ""); var $resultfieldstring; /** * Constructor. * Set some initial values */ function fastSearch() { include ("fastconfig.inc"); $this->host = $fds_conf_host; $this->port = $fds_conf_port; $this->filter_query = $fds_conf_filter_query; $this->query = ""; $this->numhits = 0; $this->hitsperset = 10; $this->offset = 0; $this->timespent = 0; $this->setQueryType('adv'); } // FAST SPECIFIC FUNCTIONS /** * Set query type for search * * Set the query type to one of the following * all : all terms must be found * any : any of the query terms may give a hit * phrase : the exact phrase must be found * adv : the query is an expression in the Fast advanced query language * * @param integer Query type */ function setQueryType($querytype) { $this->querytype = $querytype; } /** * Check if ready to execute query (query string non-empty) * * @return boolean true if query string non-empty, false otherwise */ function isReady() { if ($this->query != "") { return true; } else { return false; } } function adaptQueryToFastIndex($querystring) { /* || FAST specific || ------------ || Used for adapting NWA Search Engine Abstraction Layer queries to FAST Data Search || index structure. || */ #2004-11-22T11:19:18Z; #2004-11-22-11-19-18 //print "<!-- QUERY : $querystring -->"; $querystring = preg_replace("/nwa./", "", $querystring); $querystring = preg_replace("/archival_time:/", "archivaltime:", $querystring); $querypattern1 = "/(url:|dcformat:|framelinks:|multimedialinks:|links:|archiveidentifier:)([^\ )]*)/e"; $queryreplacement1 = "'\\1'.index_encode('\\2')"; $querypattern2 = "/(archivaltime:(\[){0,1})([0-9]*)((;){0,1})([0-9]*)((\]){0,1})/e"; $queryreplacement2 = "'\\1'.HumanTimeToISOTime('\\3').'\\4'.HumanTimeToISOTime('\\6').'\\7'"; $querypattern3 = array ("/archiveidentifier:/", "/collection:/", "/INDX2A/"); $queryreplacement3 = array ("aid:","collectionname:", "*"); $querystring = preg_replace("/%20/", "INDX20", $querystring); $querystring = preg_replace("/&/", "&", $querystring); $querystring = preg_replace($querypattern1, $queryreplacement1, $querystring); $querystring = preg_replace($querypattern2, $queryreplacement2, $querystring); $querystring = preg_replace ($querypattern3, $queryreplacement3, $querystring); if ($this->filter_query != "") { $querystring = $this->filter_query . " and (" . $querystring . ")"; } return urlencode($querystring); } // FUNCTIONS OVERRIDING PARENT CLASS FUNCTIONS /** * Sets the sortorder * legal values for sortorder is: * "ascending" and "descending" * any other value will be treated as relevance ranking */ function setSortOrder($sortorder) { if ($sortorder == "ascending" or $sortorder == "descending") { $this->sort["sortdirection"] = $sortorder; $this->sort["byfield"] = "archivaltime"; } } /** * Set result set fields * * Use this to set which fields to include * in the result set returned by the getResultSet * and getXmlResultSet methods. * * @param string Fields in result set * @return boolean False if fields not defined as legal result fields . */ function setFieldsInResult($fields) { $this->resultfieldstring = " " . str_replace( ",", " ", $fields) . " "; } /** * Set query string for search * * @param string Query string */ function setQuery($querystring) { $this->query = $querystring; $this->fastadaptedquery = $this->adaptQueryToFastIndex($this->query); } function doQuery() { $retval = true; $this->uri = "http://" . $this->host . ":" . $this->port . "/cgi-bin/asearch?type=" . $this->querytype . "&query=" . $this->fastadaptedquery . "&offset=" . $this->offset . "&hits=" . $this->hitsperset; if ($this->sort["byfield"] != "") { $this->uri .= "&sortby=" . $this->sort["byfield"] . "&sortdirection=" . $this->sort["sortdirection"]; } //print "<br>uri-en : $this->uri <br>"; $lines = @file ($this->uri); if ($lines) { //print "<pre>"; //print_r($lines); //print "</pre>"; $linestartstring = substr($lines[0], 0, 4); $linenumber = 0; if ($linestartstring == "#ERC") { $errcode = substr($lines[0], 5); $errtext = substr($lines[1], 5); $this->errormsg = $errcode . " : " . $errtext; $retval = false; } else { do { $linestartstring = substr($lines[$linenumber], 0, 4); $restofline = trim(substr($lines[$linenumber], 5)); switch ($linestartstring) { case "#HTS": $this->numhits = (int)$restofline; break; case "#CNT": $this->numhitstotal = (int)$restofline; break; case "#TIM": $this->timespent = $restofline; break; case "#FIR": $firsthit = (int)$restofline; break; case "#LAS": $lasthit = (int)$restofline; break; } $linenumber++; if ($linestartstring == "###/") { break; } } while(1); if ($this->numhits > 0) { for ($i = $firsthit; $i <= $lasthit; $i++) { do { $first4chars = substr($lines[$linenumber], 0, 4); $firstspacepos = strpos($lines[$linenumber], " "); $fieldname = substr($lines[$linenumber], 1, $firstspacepos-1); $restofline = trim(substr($lines[$linenumber], $firstspacepos)); switch ($fieldname) { case "###": break; case "##/": break; case "docvector": break; case "docid": break; case "collection": break; case "aid": break; case "framelinks": break; case "multimedialinks": break; case "links": break; case "dcidentifier": break; case "dcformat"; break; case "url"; break; case "aidview": if (stristr($this->resultfieldstring, 'archiveidentifier')) { $this->resultset[$i]['archiveidentifier'] = $restofline; } break; case "urlview": if (stristr($this->resultfieldstring, 'url')) { $this->resultset[$i]['url'] = $restofline; } break; case "collectionname": if (stristr($this->resultfieldstring, 'collection')) { $this->resultset[$i]['collection'] = $restofline; } break; case "dcdate": if (stristr($this->resultfieldstring, 'dcdate')) { $tmp = str_replace('-', '', $restofline); $tmp = str_replace(':', '', $tmp); $tmp = str_replace('Z', '', $tmp); $this->resultset[$i]['dcdate'] = str_replace('T', '', $tmp); } break; case "archivaltime": if (stristr($this->resultfieldstring, 'archival_time')) { $tmp = str_replace('-', '', $restofline); $tmp = str_replace(':', '', $tmp); $tmp = str_replace('Z', '', $tmp); $this->resultset[$i]['archival_time'] = str_replace('T', '', $tmp); } break; case "dcidentifierview": if (stristr($this->resultfieldstring, 'dcidentifier')) { $this->resultset[$i]['dcidentifier'] = $restofline; } break; case "dcformatview": if (stristr($this->resultfieldstring, 'dcformat')) { $this->resultset[$i]['dcformat'] = str_replace(' ', '', $restofline); } break; case "linksview": if (stristr($this->resultfieldstring, ' links ')) { $this->resultset[$i]['links'] = $restofline; } break; case "framelinksview": if (stristr($this->resultfieldstring, 'framelinks')) { $this->resultset[$i]['framelinks'] = $restofline; } break; case "multimedialinksview": if (stristr($this->resultfieldstring, 'multimedialinks')) { $this->resultset[$i]['multimedialinks'] = $restofline; } break; case "encodingview": if (stristr($this->resultfieldstring, 'encoding')) { $this->resultset[$i]['encoding'] = $restofline; } break; default: if (stristr($this->resultfieldstring, $fieldname)) { $this->resultset[$i][$fieldname] = $restofline; } } $linenumber++; if ($first4chars == "###/") { break; } } while(1); } } } } else { $this->errormsg = "Could not connect to server: " . $this->host . ":" . $this->port; $retval = false; } //print "<!--"; //print_r($this->resultset); //print "-->"; return $retval; } } ?> --- NEW FILE: nutch.inc --- <?php /* * This file is part of WERA. * * Copyright (C) 2001-2002 Royal Library in Stockholm, * Royal Library in Copenhagen, * Helsinki University Library of Finland, * National Library of Norway, * National and University Library of Iceland. * * WERA is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * WERA is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WERA; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** * Class for querying the Nutch Search Engine * implementation of http://archive-access.sourceforge.net * * $Id: nutch.inc,v 1.1 2005/10/04 22:59:28 stack-sf Exp $ */ include("$conf_searchenginepath/indexSearch.inc"); include("$conf_searchenginepath/indexUtils.inc"); /** * Simple function to replicate PHP 5 behaviour */ function microtime_float() { return array_sum(explode(" ",microtime())); } /** * Class for querying Nutch * * For further information about constructing * queries and setting parameters for the nucthSearch * class see Nutch Documentation * */ class nutchSearch extends indexSearch { // Nutch specific variables var $searchengineurl; var $ranking; var $xml_parser; // Object Reference to xml parser var $xml_elementname; var $xml_depth; var $hitno; var $queryurl; var $mime = array(); var $arc = array(); var $querytype; var $fastadaptedquery; var $sort; var $debug; var $supressduplicates; /** * Constructor. * Set some initial values */ function nutchSearch() { include ("config.inc"); $this->searchengineurl = $conf_searchengine_url; $this->aid_prefix = $conf_aid_prefix; $this->aid_suffix = $conf_aid_suffix; $this->debug = $conf_debug; $this->query = ""; $this->numhits = 0; $this->hitsperset = 10; $this->offset = 0; $this->timespent = 0; $this->ranking = 0; $this->setQueryType('adv'); $this->unsetSupressDuplicates(); } // NUTCH SPECIFIC FUNCTIONS /** * Set query type for search * * Set the query type to one of the following * all : all terms must be found * any : any of the query terms may give a hit * phrase : the exact phrase must be found * adv : the query is an expression in the Fast advanced query language * * @param integer Query type */ function setQueryType($querytype) { // Currently not in use by this class !S $this->querytype = $querytype; } /** * Check if ready to execute query (query string non-empty) * * @return boolean true if query string non-empty, false otherwise */ function isReady() { if ($this->query != "") { return true; } else { return false; } } function adaptQuery($querystring) { $querystring = str_replace("nwa.", "", $querystring); $querystring = str_replace("url:", "exacturl:", $querystring); $querystring = str_replace("archival_time:", "date:", $querystring); # date:[;20041217001244] -> date:20010909014640-20041217001244 # Strange, seems that nutch cant handle dates before 20010909014640 # date:[20041217001244;] -> date:20041217001244-[now] # date:[20041217001244;20050413121109] -> 20041217001244-20050413121109 $querystring = str_replace("[;", "20010909014640-", $querystring); $querystring = str_replace(";]", "-20100101000000", $querystring); $querystring = str_replace(";", "-", $querystring); $querystring = str_replace("[", "", $querystring); $querystring = str_replace("]", "", $querystring); $querystring = str_replace(" ", "%20", $querystring); $querystring = str_replace("?", "0x3f", $querystring); $querystring = str_replace("=", "0x3d", $querystring); $querystring = str_replace("&", "0x26", $querystring); return $querystring; } // FUNCTIONS OVERRIDING PARENT CLASS FUNCTIONS /** * Sets the sortorder * legal values for sortorder is: * "ascending" and "descending" * any other value will be treated as relevance ranking */ function setSortOrder($sortorder) { # e.g &dedupField=date&hitsPerDup=100&sort=date if ($sortorder == "ascending" or $sortorder == "descending") { $this->sort = "&dedupField=date&sort=date"; if ($sortorder == "descending") { $this->sort .= "&reverse=true"; } } } /** * Set suppress duplicate urls */ function setSupressDuplicates() { $this->supressduplicates = "&hitsPerDup=1&dedupField=exacturl"; } /** * Unset suppress duplicate urls */ function unsetSupressDuplicates() { $this->supressduplicates = "&hitsPerDup=0"; } /** * Set result set fields * * Use this to set which summary fields to include * in the result set returned by the getResultSet * and getXmlResultSet methods. * * @param string Fields in result set */ function setFieldsInResult($fields) { #print "<!-- Resultsetfields : $fields -->"; $this->resultfields = preg_split ("/[\s,]+/", trim($fields)); } /** * Set query string for search * * @param string Query string */ function setQuery($querystring) { $this->query = $querystring; //$this->fastadaptedquery = $this->adaptQueryToFastIndex($this->query); } /** * Execute the query * * @return boolean False if empty query */ function doQuery() { unset($this->resultset); $time_start = microtime_float(); $this->queryurl = $this->searchengineurl . "?query=" . $this->adaptQuery($this->query) . "&start=" . $this->offset . "&hitsPerPage=" . $this->hitsperset . $this->supressduplicates; if ($this->sort != "") { $this->queryurl .= $this->sort; } if ($this->debug == 1) { print $this->queryurl; } if ($this->isReady()) { //$this->hitno = 0; $this->hitno = $this->offset; $this->xml_parser = xml_parser_create(); xml_set_object($this->xml_parser, $this); xml_set_element_handler($this->xml_parser, "startElement", "endElement"); xml_set_character_data_handler($this->xml_parser, "characterData"); // sverreb //print "\n<!--QUERY " . $this->queryurl . "-->\n"; $data = file_get_contents($this->queryurl); if (!xml_parse($this->xml_parser, $data)) { die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($this->xml_parser)), xml_get_current_line_number($this->xml_parser))); } xml_parser_free($this->xml_parser); if ($this->numhitstotal > 0) { foreach ($this->resultset as $key => $val) { if (in_array("dcformat", $this->resultfields)) { $this->resultset[$key]['dcformat'] = $this->mime[$key]['primary'] . "/" . $this->mime[$key]['sub']; } if (in_array("archiveidentifier", $this->resultfields)) { $this->resultset[$key]['archiveidentifier'] = $this->arc[$key]['offset'] . "/" . $this->aid_prefix . $this->arc[$key]['name'] . $this->aid_suffix; } } } $this->timespent = (microtime_float() - $time_start); if ($this->debug == 1) { print "\n<!--"; print $this->queryurl; print_r($this->resultset); print "-->\n"; } return true; } } function startElement($parser, $name, $attrs) { $this->xml_depth[$parser]++; $this->xml_elementname = $name; if ($this->xml_elementname =="ITEM") { $this->hitno++; $this->numhits = $this->hitno; } } function endElement($parser, $name) { /* if ($this->xml_depth[$parser] == 4) { $stripout = array("-", ":", "T", "Z"); $this->resultset[$this->hitno]['archival_time'] = str_replace($stripout, "", $this->resultset[$this->hitno]['archival_time']); }*/ $this->xml_depth[$parser]--; } function characterData($parser, $data) { if (trim($data) != "") { switch ($this->xml_elementname) { case "OPENSEARCH:TOTALRESULTS": $this->numhitstotal = trim($data); break; case "OPENSEARCH:STARTINDEX": $this->offset = trim($data); break; } } if ($this->xml_depth[$parser] == 4) { switch ($this->xml_elementname) { case "TITLE": if (in_array("title", $this->resultfields)) { $this->resultset[$this->hitno]['dctitle'] .= $data; } if (in_array("teaser", $this->resultfields)) { $this->resultset[$this->hitno]['teaser'] .= $data; } break; case "NUTCH:ARCDATE": if (in_array("archival_time", $this->resultfields)) { $this->resultset[$this->hitno]['archival_time'] .= $data; } break; case "DESCRIPTION": if (in_array("description", $this->resultfields)) { $this->resultset[$this->hitno]['description'] .= $data; } break; case "LINK": if (in_array("url", $this->resultfields)) { $this->resultset[$this->hitno]['url'] .= $data; } break; case "NUTCH:ARCNAME": if (in_array("archiveidentifier", $this->resultfields)) { $this->arc[$this->hitno]['name'] .= $data; #$this->resultset[$this->hitno]['archiveidentifier'] .= $data; } break; case "NUTCH:ARCOFFSET": if (in_array("archiveidentifier", $this->resultfields)) { $this->arc[$this->hitno]['offset'] .= $data; } break; case "NUTCH:PRIMARYTYPE": if (in_array("dcformat", $this->resultfields)) { $this->mime[$this->hitno]['primary'] .= $data; } break; case "NUTCH:SUBTYPE": if (in_array("dcformat", $this->resultfields)) { $this->mime[$this->hitno]['sub'] .= $data; } break; } } } } ?> |