From: <re...@us...> - 2003-12-31 02:06:25
|
Update of /cvsroot/tikiwiki/tiki/lib/search In directory sc8-pr-cvs1:/tmp/cvs-serv16646/lib/search Modified Files: Tag: BRANCH-1-8 searchlib.php Log Message: syllable serach works now. Index: searchlib.php =================================================================== RCS file: /cvsroot/tikiwiki/tiki/lib/search/searchlib.php,v retrieving revision 1.10.2.2 retrieving revision 1.10.2.3 diff -u -d -r1.10.2.2 -r1.10.2.3 --- searchlib.php 30 Dec 2003 23:20:34 -0000 1.10.2.2 +++ searchlib.php 31 Dec 2003 02:06:19 -0000 1.10.2.3 @@ -31,8 +31,201 @@ } function &find($where,$words,$offset, $maxRecords) { - return($this->find_exact($where,$words,$offset, $maxRecords)); + $exact=$this->find_exact($where,$words,$offset, $maxRecords); + $part=$this->find_part($where,$words,$offset, $maxRecords); + $res=array(); + $res["data"]=array_merge($exact["data"],$part["data"]); + $res["cant"]=$exact["cant"]+$part["cant"]; + return $res; } + + + function &find_part($where,$words,$offset, $maxRecords) { + $words=preg_split("/[\W]+/",$words,-1,PREG_SPLIT_NO_EMPTY); + if (count($words)>0) { + switch($where) { + case "wikis": + return $this->find_part_wiki($words,$offset, $maxRecords); + break; + case "forums": + return $this->find_part_forums($words,$offset, $maxRecords); + break; + case "articles": + return $this->find_part_articles($words,$offset, $maxRecords); + break; + case "blogs": + return $this->find_part_blogs($words,$offset, $maxRecords); + break; + case "posts": + return $this->find_part_blog_posts($words,$offset, $maxRecords); + break; + case "faqs": + return $this->find_part_faqs($words,$offset, $maxRecords); + break; + case "directory": + return $this->find_part_directory($words,$offset, $maxRecords); + break; + case "galleries": + return $this->find_part_imggals($words,$offset, $maxRecords); + break; + case "images": + return $this->find_part_img($words,$offset, $maxRecords); + break; + case "trackers": + return $this->find_part_trackers($words,$offset, $maxRecords); + break; + + default: + return $this->find_part_all($words,$offset, $maxRecords); + break; + } + } + } + + function refresh_lru_wordlist($syllable) { + // delete from wordlist and lru list + $this->query("delete from `tiki_searchwords` where `syllable`=?",array($syllable),-1,-1,false); + $this->query("delete from `tiki_searchsyllable` where `syllable`=?",array($syllable),-1,-1,false); + // search the searchindex - can take long time + $ret=array(); + $query="select `searchword` from `tiki_searchindex` where `searchword` like ?"; + $result=$this->query($query,array('%'.$syllable.'%')); + while ($res = $result->fetchRow()) { + $ret[]=$res["searchword"]; + } + // cache this long running query + foreach($ret as $searchword) { + $this->query("insert into `tiki_searchwords` (`syllable`,`searchword`) values (?,?)",array($syllable,$searchword),-1,-1,false); + } + // set lru list parameters + $now=time(); + $this->query("insert into `tiki_searchsyllable`(`syllable`,`lastUsed`,`lastUpdated`) values (?,?,?)", + array($syllable,(int) $now,(int) $now)); + + // at random rate: check length of lru list and purge these that + // have not been used for long time. This is what a lru list + // basically does + $n=5; //todo: make it configurable + list($usec, $sec) = explode(" ",microtime()); + srand (ceil($sec+100*$usec)); + if(rand(1,$n)==1) { + $lrulength=$this->getOne("select count(*) from `tiki_searchsyllable`",array()); + if ($lrulength > 100) { // only purge if lru list is long. todo: make configurable + //purge oldest + $diff=$lrulength-100; + $oldwords=array(); + $query="select `syllable` from `tiki_searchsyllable` order by `lastUsed` asc"; + $result=$this->query($query,array(),$diff); + while ($res = $result->fetchRow()) { + //we probably cannot delete now. to avoid database deadlocks + //we save the words and delete later + $oldwords[]=$res["syllable"]; + } + foreach($oldwords as $oldword) { + $this->query("delete from `tiki_searchwords` where `syllable`=?",array($oldword),-1,-1,false); + $this->query("delete from `tiki_searchsyllable` where `syllable`=?",array($oldword),-1,-1,false); + } + + } + } + return $ret; + } + + + function &get_wordlist_from_syllables($syllables) { + $ret=array(); + foreach($syllables as $syllable) { + //Have a look at the lru list (tiki_searchsyllable) + $bindvars=array($syllable); + $age=time()-$this->getOne("select `lastUpdated` from `tiki_searchsyllable` where `syllable`=?",$bindvars); + if(!$age || $age>172800) {// older than 2 days. to be configured + $a=$this->refresh_lru_wordlist($syllable); + $ret=array_merge($ret,$a); + } else { + + // get wordlist + $query="select `searchword` from `tiki_searchwords` where `syllable`=?"; + $result=$this->query($query,$bindvars); + while ($res = $result->fetchRow()) { + $ret[]=$res["searchword"]; + } + } + + // update lru list status + $now=time(); + $this->query("update `tiki_searchsyllable` set `lastUsed`=? where `syllable`=?",array((int) $now,$syllable)); + } + return $ret; + } + + function &find_part_wiki($words,$offset, $maxRecords) { + return $this->find_exact_wiki($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_articles($words,$offset, $maxRecords) { + return $this->find_exact_articles($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_forums($words,$offset, $maxRecords) { + return $this->find_exact_forums($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_blogs($words,$offset, $maxRecords) { + return $this->find_exact_blogs($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_blog_posts($words,$offset, $maxRecords) { + return $this->find_exact_blog_posts($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_faqs($words,$offset, $maxRecords) { + return $this->find_exact_faqs($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_directory($words,$offset, $maxRecords) { + return $this->find_exact_directory($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_imggals($words,$offset, $maxRecords) { + return $this->find_exact_imggals($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_img($words,$offset, $maxRecords) { + return $this->find_exact_img($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + function &find_part_trackers($words,$offset, $maxRecords) { + return $this->find_exact_trackers($this->get_wordlist_from_syllables($words),$offset, $maxRecords); + } + + + + function &find_part_all($words,$offset, $maxRecords) { + $wikiresults=$this->find_part_wiki($words,$offset, $maxRecords); + $artresults=$this->find_part_articles($words,$offset, $maxRecords); + $forumresults=$this->find_part_forums($words,$offset, $maxRecords); + $blogresults=$this->find_part_blogs($words,$offset, $maxRecords); + $blogpostsresults=$this->find_part_blog_posts($words,$offset, $maxRecords); + $faqresults=$this->find_part_faqs($words,$offset, $maxRecords); + $dirresults=$this->find_part_directory($words,$offset, $maxRecords); + $imggalsresults=$this->find_part_imggals($words,$offset, $maxRecords); + $imgresults=$this->find_part_img($words,$offset, $maxRecords); + $trackerresults=$this->find_part_trackers($words,$offset, $maxRecords); + + //merge the results + $res=array(); + $res["data"]=array_merge($wikiresults["data"],$artresults["data"], + $blogresults["data"],$faqresults["data"], + $blogpostsresults["data"],$forumresults["data"], + $dirresults["data"],$imggalsresults["data"], + $imgresults["data"],$trackerresults["data"]); + $res["cant"]=$wikiresults["cant"]+$artresults["cant"]+ + $blogresults["cant"]+$faqresults["cant"]+ + $blogpostsresults["cant"]+$forumresults["cant"]+ + $dirresults["cant"]+$imggalsresults["cant"]+ + $imgresults["cant"]+$trackerresults["cant"]; + return ($res); + } function &find_exact($where,$words,$offset, $maxRecords) { $words=preg_split("/[\W]+/",$words,-1,PREG_SPLIT_NO_EMPTY); |