|
From: Simon H. <sim...@us...> - 2009-06-25 09:07:52
|
Update of /cvsroot/stack/stack-dev/lib In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv1201/lib Modified Files: KeywordFinder.php Log Message: Intergration -> Integration Index: KeywordFinder.php =================================================================== RCS file: /cvsroot/stack/stack-dev/lib/KeywordFinder.php,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** KeywordFinder.php 5 May 2008 22:32:18 -0000 1.2 --- KeywordFinder.php 25 Jun 2009 09:07:43 -0000 1.3 *************** *** 33,42 **** /** * Current keywords ! * * @var array * @access private */ private $current; ! /** * --- 33,42 ---- /** * Current keywords ! * * @var array * @access private */ private $current; ! /** * *************** *** 46,50 **** */ private $questionStem; ! /** * --- 46,50 ---- */ private $questionStem; ! /** * *************** *** 54,58 **** */ private $stemWords; ! /** * --- 54,58 ---- */ private $stemWords; ! /** * *************** *** 62,66 **** */ private $keywords; ! /** * Filters the question stem looking for words in the keyword database --- 62,66 ---- */ private $keywords; ! /** * Filters the question stem looking for words in the keyword database *************** *** 77,84 **** $this->keywords = $keywords; } ! ! /** ! * Generates keywords automatically from the question stem text. * Words are added if they are already in the keywords database, and not on a * list of ignore words --- 77,84 ---- $this->keywords = $keywords; } ! ! /** ! * Generates keywords automatically from the question stem text. * Words are added if they are already in the keywords database, and not on a * list of ignore words *************** *** 92,99 **** $db = new StackDBKeywords(); $db->connect(); ! //compare this list to words extracted from question stem $dbKeywords = $db->getAllKeywords(); ! //if there are any words to check and the db already contains keywords, then try to match up potential keywords if(!empty($this->stemWords) && !empty($dbKeywords)) --- 92,99 ---- $db = new StackDBKeywords(); $db->connect(); ! //compare this list to words extracted from question stem $dbKeywords = $db->getAllKeywords(); ! //if there are any words to check and the db already contains keywords, then try to match up potential keywords if(!empty($this->stemWords) && !empty($dbKeywords)) *************** *** 117,121 **** { //check if the word is already in the database ! if((in_array($stemWord, $dbKeywords)) || (in_array(strtolower($stemWord), $dbKeywords))) { --- 117,121 ---- { //check if the word is already in the database ! if((in_array($stemWord, $dbKeywords)) || (in_array(strtolower($stemWord), $dbKeywords))) { *************** *** 125,134 **** } } ! //return keywords in string format. return $this->keywords; } ! ! /** * Extracts the words from a comma or space seperated string, removing empty values --- 125,134 ---- } } ! //return keywords in string format. return $this->keywords; } ! ! /** * Extracts the words from a comma or space seperated string, removing empty values *************** *** 144,148 **** //split on spaces $currentKeywords = explode(' ', $keywords); ! if(!empty($currentKeywords)) { --- 144,148 ---- //split on spaces $currentKeywords = explode(' ', $keywords); ! if(!empty($currentKeywords)) { *************** *** 155,163 **** } } ! return $current; } ! ! /** * Filters out any potential keywords from the question stem. --- 155,163 ---- } } ! return $current; } ! ! /** * Filters out any potential keywords from the question stem. *************** *** 172,179 **** { $rawStem = $questionStem->getRawCasText(); ! //remove hints, html and @s from the question stem. $rawStem = str_replace('$', '_£_', $rawStem); //TODO get the proper pattern matching expression for $'s ! $stem = new stringUtil($rawStem); $rawStem = $stem->removeBetween('_£_','_£_'); --- 172,179 ---- { $rawStem = $questionStem->getRawCasText(); ! //remove hints, html and @s from the question stem. $rawStem = str_replace('$', '_£_', $rawStem); //TODO get the proper pattern matching expression for $'s ! $stem = new stringUtil($rawStem); $rawStem = $stem->removeBetween('_£_','_£_'); *************** *** 185,193 **** $rawStem = $stem->removeBetween('<IEfeedback>','</IEfeedback>'); $rawStem = $stem->removeBetween('<PRTfeedback>','</PRTfeedback>'); ! //remove punctuation $punctuation = array('.', ',', ';', ':', '!', '?', '"',"'"); $rawStem = str_replace($punctuation, '', $rawStem); ! $words = array(); $lines = explode("\n", $rawStem); --- 185,193 ---- $rawStem = $stem->removeBetween('<IEfeedback>','</IEfeedback>'); $rawStem = $stem->removeBetween('<PRTfeedback>','</PRTfeedback>'); ! //remove punctuation $punctuation = array('.', ',', ';', ':', '!', '?', '"',"'"); $rawStem = str_replace($punctuation, '', $rawStem); ! $words = array(); $lines = explode("\n", $rawStem); *************** *** 197,204 **** { $exploded = explode(' ', $line); ! if(!empty($exploded)) { ! foreach($exploded as $word) { --- 197,204 ---- { $exploded = explode(' ', $line); ! if(!empty($exploded)) { ! foreach($exploded as $word) { *************** *** 212,225 **** } } ! //find any synonyms $words = $this->findSynonyms($words); ! //remove any common or unwanted words $words = $this->filterUnwantedWords($words); ! return $words; } ! /** * Filters out any words in the ignore list from the inputted array. --- 212,225 ---- } } ! //find any synonyms $words = $this->findSynonyms($words); ! //remove any common or unwanted words $words = $this->filterUnwantedWords($words); ! return $words; } ! /** * Filters out any words in the ignore list from the inputted array. *************** *** 237,241 **** $root = $config->get('docroot'); $path = $root.'/lang/'.$lang.'/ignoreWords.txt'; ! if(file_exists($path)) { --- 237,241 ---- $root = $config->get('docroot'); $path = $root.'/lang/'.$lang.'/ignoreWords.txt'; ! if(file_exists($path)) { *************** *** 243,249 **** $ignoreArray = explode("\n", $ignoreString); $ignoreArray[0] = ''; ! //remove any of these words to ignore ! foreach($words as $word) { --- 243,249 ---- $ignoreArray = explode("\n", $ignoreString); $ignoreArray[0] = ''; ! //remove any of these words to ignore ! foreach($words as $word) { *************** *** 266,273 **** } } ! /** ! * Loads an external file /lang/XX/synonyms.txt where XX is the country code ! * & replaces found words with a standard synonym. For example integal becomes intergration. * * @param array $words --- 266,273 ---- } } ! /** ! * Loads an external file /lang/XX/synonyms.txt where XX is the country code ! * & replaces found words with a standard synonym. For example integal becomes integration. * * @param array $words *************** *** 284,288 **** $root = $config->get('docroot'); $path = $root.'/lang/'.$lang.'/synonyms.txt'; ! if(file_exists($path)) { --- 284,288 ---- $root = $config->get('docroot'); $path = $root.'/lang/'.$lang.'/synonyms.txt'; ! if(file_exists($path)) { *************** *** 292,296 **** unset($synonynmArray[0]); unset($synonynmArray[1]); ! if(!empty($synonynmArray)) { --- 292,296 ---- unset($synonynmArray[0]); unset($synonynmArray[1]); ! if(!empty($synonynmArray)) { *************** *** 298,309 **** { $exploded = explode('=', $element); ! $key = trim($exploded[0]); $val = explode(',', $exploded[1]); ! for($i=0; $i < count($val); $i++) { $val[$i] = trim($val[$i]); ! if($val[$i] != '') { --- 298,309 ---- { $exploded = explode('=', $element); ! $key = trim($exploded[0]); $val = explode(',', $exploded[1]); ! for($i=0; $i < count($val); $i++) { $val[$i] = trim($val[$i]); ! if($val[$i] != '') { *************** *** 311,315 **** } } ! } //check if one of the $words has a synonym --- 311,315 ---- } } ! } //check if one of the $words has a synonym *************** *** 329,333 **** } //if any input words have a synonym (not already in $words) replace ! return $words; } --- 329,333 ---- } //if any input words have a synonym (not already in $words) replace ! return $words; } *************** *** 336,340 **** return $words; } ! } else --- 336,340 ---- return $words; } ! } else |