From: Sverre B. <sv...@us...> - 2005-10-04 23:40:54
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv865/src/webapps/wera/lib/seal Modified Files: nutch.inc indexSearch.inc Log Message: Index: indexSearch.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal/indexSearch.inc,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** indexSearch.inc 4 Oct 2005 22:59:28 -0000 1.1 --- indexSearch.inc 4 Oct 2005 23:40:45 -0000 1.2 *************** *** 140,145 **** * to dilineate one expression from another. For example, in the query * ! * nwa.dcdate:998690406 AND (nwa.title:"ouagadougou" ! * OR nwa.title:"capitol of burkina fazo"), * * parentheses group the ORs together so they are as a distinct --- 140,145 ---- * to dilineate one expression from another. For example, in the query * ! * dcdate:998690406 AND (title:"ouagadougou" ! * OR title:"capitol of burkina fazo"), * * parentheses group the ORs together so they are as a distinct Index: nutch.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/seal/nutch.inc,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** nutch.inc 4 Oct 2005 22:59:28 -0000 1.1 --- nutch.inc 4 Oct 2005 23:40:45 -0000 1.2 *************** *** 52,56 **** // Nutch specific variables var $searchengineurl; - var $ranking; var $xml_parser; // Object Reference to xml parser var $xml_elementname; --- 52,55 ---- *************** *** 60,65 **** var $mime = array(); var $arc = array(); - var $querytype; - var $fastadaptedquery; var $sort; var $debug; --- 59,62 ---- *************** *** 81,86 **** $this->offset = 0; $this->timespent = 0; - $this->ranking = 0; - $this->setQueryType('adv'); $this->unsetSupressDuplicates(); } --- 78,81 ---- *************** *** 89,108 **** // NUTCH SPECIFIC FUNCTIONS - - /** - * Set query type for search - * - * Set the query type to one of the following - * all : all terms must be found - * any : any of the query terms may give a hit - * phrase : the exact phrase must be found - * adv : the query is an expression in the Fast advanced query language - * - * @param integer Query type - */ - function setQueryType($querytype) { - // Currently not in use by this class !S - $this->querytype = $querytype; - } /** --- 84,87 ---- *************** *** 121,141 **** function adaptQuery($querystring) { - - $querystring = str_replace("nwa.", "", $querystring); - $querystring = str_replace("url:", "exacturl:", $querystring); - $querystring = str_replace("archival_time:", "date:", $querystring); - # date:[;20041217001244] -> date:20010909014640-20041217001244 - # Strange, seems that nutch cant handle dates before 20010909014640 - # date:[20041217001244;] -> date:20041217001244-[now] - # date:[20041217001244;20050413121109] -> 20041217001244-20050413121109 - $querystring = str_replace("[;", "20010909014640-", $querystring); - $querystring = str_replace(";]", "-20100101000000", $querystring); - $querystring = str_replace(";", "-", $querystring); - $querystring = str_replace("[", "", $querystring); - $querystring = str_replace("]", "", $querystring); $querystring = str_replace(" ", "%20", $querystring); - $querystring = str_replace("?", "0x3f", $querystring); - $querystring = str_replace("=", "0x3d", $querystring); - $querystring = str_replace("&", "0x26", $querystring); return $querystring; } --- 100,104 ---- *************** *** 185,189 **** */ function setFieldsInResult($fields) { - #print "<!-- Resultsetfields : $fields -->"; $this->resultfields = preg_split ("/[\s,]+/", trim($fields)); } --- 148,151 ---- *************** *** 196,200 **** function setQuery($querystring) { $this->query = $querystring; - //$this->fastadaptedquery = $this->adaptQueryToFastIndex($this->query); } --- 158,161 ---- *************** *** 206,209 **** --- 167,171 ---- function doQuery() { + $retval = true; unset($this->resultset); $time_start = microtime_float(); *************** *** 220,255 **** if ($this->isReady()) { - //$this->hitno = 0; $this->hitno = $this->offset; $this->xml_parser = xml_parser_create(); xml_set_object($this->xml_parser, $this); xml_set_element_handler($this->xml_parser, "startElement", "endElement"); ! xml_set_character_data_handler($this->xml_parser, "characterData"); // sverreb ! //print "\n<!--QUERY " . $this->queryurl . "-->\n"; ! $data = file_get_contents($this->queryurl); ! if (!xml_parse($this->xml_parser, $data)) { ! die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($this->xml_parser)), xml_get_current_line_number($this->xml_parser))); ! } ! xml_parser_free($this->xml_parser); ! ! if ($this->numhitstotal > 0) { ! foreach ($this->resultset as $key => $val) { ! if (in_array("dcformat", $this->resultfields)) { ! $this->resultset[$key]['dcformat'] = $this->mime[$key]['primary'] . "/" . $this->mime[$key]['sub']; ! } ! if (in_array("archiveidentifier", $this->resultfields)) { ! $this->resultset[$key]['archiveidentifier'] = $this->arc[$key]['offset'] . "/" . $this->aid_prefix . $this->arc[$key]['name'] . $this->aid_suffix; ! } ! } } ! ! $this->timespent = (microtime_float() - $time_start); ! if ($this->debug == 1) { ! print "\n<!--"; ! print $this->queryurl; ! print_r($this->resultset); ! print "-->\n"; } ! return true; } } --- 182,224 ---- if ($this->isReady()) { $this->hitno = $this->offset; $this->xml_parser = xml_parser_create(); xml_set_object($this->xml_parser, $this); xml_set_element_handler($this->xml_parser, "startElement", "endElement"); ! xml_set_character_data_handler($this->xml_parser, "characterData"); ! $data = @file_get_contents($this->queryurl); ! if ($data) { ! if (!xml_parse($this->xml_parser, $data)) { ! #die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($this->xml_parser)), xml_get_current_line_number($this->xml_parser))); ! $retval = false; ! $this->errormsg = sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($this->xml_parser)), xml_get_current_line_number($this->xml_parser)); ! } ! xml_parser_free($this->xml_parser); ! if ($retval) { ! if ($this->numhitstotal > 0) { ! foreach ($this->resultset as $key => $val) { ! if (in_array("dcformat", $this->resultfields)) { ! $this->resultset[$key]['dcformat'] = $this->mime[$key]['primary'] . "/" . $this->mime[$key]['sub']; ! } ! if (in_array("archiveidentifier", $this->resultfields)) { ! $this->resultset[$key]['archiveidentifier'] = $this->arc[$key]['offset'] . "/" . $this->aid_prefix . $this->arc[$key]['name'] . $this->aid_suffix; ! } ! } ! } ! ! $this->timespent = (microtime_float() - $time_start); ! if ($this->debug == 1) { ! print "\n<!--"; ! print $this->queryurl; ! print_r($this->resultset); ! print "-->\n"; ! } ! } } ! else { ! $retval = false; ! $this->errormsg = "Error : Failed to open stream!"; } ! return $retval; } } *************** *** 265,273 **** function endElement($parser, $name) { - /* - if ($this->xml_depth[$parser] == 4) { - $stripout = array("-", ":", "T", "Z"); - $this->resultset[$this->hitno]['archival_time'] = str_replace($stripout, "", $this->resultset[$this->hitno]['archival_time']); - }*/ $this->xml_depth[$parser]--; } --- 234,237 ---- *************** *** 289,301 **** case "TITLE": if (in_array("title", $this->resultfields)) { ! $this->resultset[$this->hitno]['dctitle'] .= $data; ! } ! if (in_array("teaser", $this->resultfields)) { ! $this->resultset[$this->hitno]['teaser'] .= $data; } break; case "NUTCH:ARCDATE": ! if (in_array("archival_time", $this->resultfields)) { ! $this->resultset[$this->hitno]['archival_time'] .= $data; } break; --- 253,262 ---- case "TITLE": if (in_array("title", $this->resultfields)) { ! $this->resultset[$this->hitno]['title'] .= $data; } break; case "NUTCH:ARCDATE": ! if (in_array("date", $this->resultfields)) { ! $this->resultset[$this->hitno]['date'] .= $data; } break; |