|
From: Paul S. O. <ps...@us...> - 2002-03-22 17:53:32
|
Update of /cvsroot/phpbb/phpBB2/includes
In directory usw-pr-cvs1:/tmp/cvs-serv27359/includes
Modified Files:
functions_search.php
Log Message:
preg_quote stopwords
Index: functions_search.php
===================================================================
RCS file: /cvsroot/phpbb/phpBB2/includes/functions_search.php,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** functions_search.php 19 Mar 2002 00:46:22 -0000 1.5
--- functions_search.php 22 Mar 2002 17:53:28 -0000 1.6
***************
*** 24,33 ****
// Weird, $init_match doesn't work with static when double quotes (") are used...
static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', "'", '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
! static $drop_char_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", "", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " " , " ", " ", " ", " ", " ", " ");
// static $accent_match = array("ß", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "ø", "ù", "ú", "û", "ü", "ý", "þ", "ÿ");
// static $accent_replace = array("s", "a", "a", "a", "a", "a", "a", "a", "c", "e", "e", "e", "e", "i", "i", "i", "i", "o", "n", "o", "o", "o", "o", "o", "o", "u", "u", "u", "u", "y", "t", "y");
! $entry = " " . strip_tags(strtolower($entry)) . " ";
for($i = 0; $i < count($accent_match); $i++)
--- 24,33 ----
// Weird, $init_match doesn't work with static when double quotes (") are used...
static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', "'", '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
! static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ', ' ', ' ');
// static $accent_match = array("ß", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "ø", "ù", "ú", "û", "ü", "ý", "þ", "ÿ");
// static $accent_replace = array("s", "a", "a", "a", "a", "a", "a", "a", "c", "e", "e", "e", "e", "i", "i", "i", "i", "o", "n", "o", "o", "o", "o", "o", "o", "u", "u", "u", "u", "y", "t", "y");
! $entry = ' ' . strip_tags(strtolower($entry)) . ' ';
for($i = 0; $i < count($accent_match); $i++)
***************
*** 36,60 ****
}
! if( $mode == "post" )
{
// Replace line endings by a space
! $entry = preg_replace("/[\n\r]/is", " ", $entry);
// HTML entities like
! $entry = preg_replace("/\b&[a-z]+;\b/", " ", $entry);
// Remove URL's
! $entry = preg_replace("/\b[a-z0-9]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?/", " ", $entry);
// Quickly remove BBcode.
! $entry = preg_replace("/\[img:[a-z0-9]{10,}\].*?\[\/img:[a-z0-9]{10,}\]/", " ", $entry);
! $entry = preg_replace("/\[\/?url(=.*?)?\]/", " ", $entry);
! $entry = preg_replace("/\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]/", " ", $entry);
}
! else if( $mode == "search" )
{
! $entry = str_replace("+", " and ", $entry);
! $entry = str_replace("-", " not ", $entry);
}
// Replace numbers on their own
! $entry = preg_replace("/\b[0-9]+\b/", " ", $entry);
//
--- 36,60 ----
}
! if( $mode == 'post' )
{
// Replace line endings by a space
! $entry = preg_replace('/[\n\r]/is', ' ', $entry);
// HTML entities like
! $entry = preg_replace('/\b&[a-z]+;\b/', ' ', $entry);
// Remove URL's
! $entry = preg_replace('/\b[a-z0-9]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?/', ' ', $entry);
// Quickly remove BBcode.
! $entry = preg_replace('/\[img:[a-z0-9]{10,}\].*?\[\/img:[a-z0-9]{10,}\]/', ' ', $entry);
! $entry = preg_replace('/\[\/?url(=.*?)?\]/', ' ', $entry);
! $entry = preg_replace('/\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]/', ' ', $entry);
}
! else if( $mode == 'search' )
{
! $entry = str_replace('+', ' and ', $entry);
! $entry = str_replace('-', ' not ', $entry);
}
// Replace numbers on their own
! $entry = preg_replace('/\b[0-9]+\b/', ' ', $entry);
//
***************
*** 66,75 ****
}
! if( $mode == "post" )
{
! $entry = str_replace("*", " ", $entry);
// 'words' that consist of <=3 or >=25 characters are removed.
! $entry = preg_replace("/\b([a-z0-9]{1,3}|[a-z0-9]{25,})\b/", " ", $entry);
}
--- 66,75 ----
}
! if( $mode == 'post' )
{
! $entry = str_replace('*', ' ', $entry);
// 'words' that consist of <=3 or >=25 characters are removed.
! $entry = preg_replace('/\b([a-z0-9]{1,3}|[a-z0-9]{25,})\b/',' ', $entry);
}
***************
*** 80,86 ****
$stopword = trim($stopword_list[$j]);
! if ( $mode == "post" || ( $stopword != "not" && $stopword != "and" && $stopword != "or" ) )
{
! $entry = preg_replace("/\b" . $stopword . "\b/", " ", $entry);
}
}
--- 80,86 ----
$stopword = trim($stopword_list[$j]);
! if ( $mode == 'post' || ( $stopword != 'not' && $stopword != 'and' && $stopword != 'or' ) )
{
! $entry = preg_replace('#\b' . preg_quote($stopword) . '\b#', ' ', $entry);
}
}
***************
*** 91,98 ****
for ($j = 0; $j < count($synonym_list); $j++)
{
! list($replace_synonym, $match_synonym) = split(" ", trim(strtolower($synonym_list[$j])));
! if ( $mode == "post" || ( $match_synonym != "not" && $match_synonym != "and" && $match_synonym != "or" ) )
{
! $entry = preg_replace("/\b" . trim($match_synonym) . "\b/", " " . trim($replace_synonym) . " ", $entry);
}
}
--- 91,98 ----
for ($j = 0; $j < count($synonym_list); $j++)
{
! list($replace_synonym, $match_synonym) = split(' ', trim(strtolower($synonym_list[$j])));
! if ( $mode == 'post' || ( $match_synonym != 'not' && $match_synonym != 'and' && $match_synonym != 'or' ) )
{
! $entry = preg_replace('#\b' . trim($match_synonym) . '\b#', ' ' . trim($replace_synonym) . ' ', $entry);
}
}
***************
*** 102,108 ****
}
! function split_words(&$entry, $mode = "post")
{
! if( $mode == "post" )
{
preg_match_all("/\b(\w[\w']*\w+|\w+?)\b/", $entry, $split_entries);
--- 102,108 ----
}
! function split_words(&$entry, $mode = 'post')
{
! if( $mode == 'post' )
{
preg_match_all("/\b(\w[\w']*\w+|\w+?)\b/", $entry, $split_entries);
***************
*** 110,114 ****
else
{
! preg_match_all("/(\*?[a-z0-9]+\*?)|\b([a-z0-9]+)\b/", $entry, $split_entries);
}
--- 110,114 ----
else
{
! preg_match_all('/(\*?[a-z0-9]+\*?)|\b([a-z0-9]+)\b/', $entry, $split_entries);
}
***************
*** 116,125 ****
}
! function add_search_words($post_id, $post_text, $post_title = "")
{
global $db, $phpbb_root_path, $board_config, $lang;
! $stopwords_array = @file($phpbb_root_path . "language/lang_" . $board_config['default_lang'] . "/search_stopwords.txt");
! $synonym_array = @file($phpbb_root_path . "language/lang_" . $board_config['default_lang'] . "/search_synonyms.txt");
$search_raw_words = array();
--- 116,125 ----
}
! function add_search_words($post_id, $post_text, $post_title = '')
{
global $db, $phpbb_root_path, $board_config, $lang;
! $stopwords_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_stopwords.txt");
! $synonym_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_synonyms.txt");
$search_raw_words = array();
***************
*** 131,135 ****
while( list($word_in, $search_matches) = @each($search_raw_words) )
{
! $word_insert_sql[$word_in] = "";
if( !empty($search_matches) )
{
--- 131,135 ----
while( list($word_in, $search_matches) = @each($search_raw_words) )
{
! $word_insert_sql[$word_in] = '';
if( !empty($search_matches) )
{
***************
*** 138,142 ****
$search_matches[$i] = trim($search_matches[$i]);
! if( $search_matches[$i] != "" )
{
$word[] = $search_matches[$i];
--- 138,142 ----
$search_matches[$i] = trim($search_matches[$i]);
! if( $search_matches[$i] != '' )
{
$word[] = $search_matches[$i];
***************
*** 154,159 ****
sort($word);
! $prev_word = "";
! $word_text_sql = "";
$temp_word = array();
for($i = 0; $i < count($word); $i++)
--- 154,159 ----
sort($word);
! $prev_word = '';
! $word_text_sql = '';
$temp_word = array();
for($i = 0; $i < count($word); $i++)
***************
*** 162,166 ****
{
$temp_word[] = $word[$i];
! $word_text_sql .= ( ( $word_text_sql != "" ) ? ", " : "" ) . "'" . $word[$i] . "'";
}
$prev_word = $word[$i];
--- 162,166 ----
{
$temp_word[] = $word[$i];
! $word_text_sql .= ( ( $word_text_sql != '' ) ? ', ' : '' ) . "'" . $word[$i] . "'";
}
$prev_word = $word[$i];
***************
*** 181,185 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't select words", "", __LINE__, __FILE__, $sql);
}
--- 181,185 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not select words', '', __LINE__, __FILE__, $sql);
}
***************
*** 191,195 ****
}
! $value_sql = "";
$match_word = array();
for ($i = 0; $i < count($word); $i++)
--- 191,195 ----
}
! $value_sql = '';
$match_word = array();
for ($i = 0; $i < count($word); $i++)
***************
*** 207,214 ****
case 'mysql':
case 'mysql4':
! $value_sql .= ( ( $value_sql != "" ) ? ", " : "" ) . "('" . $word[$i] . "')";
break;
case 'mssql':
! $value_sql .= ( ( $value_sql != "" ) ? " UNION ALL " : "" ) . "SELECT '" . $word[$i] . "'";
break;
default:
--- 207,214 ----
case 'mysql':
case 'mysql4':
! $value_sql .= ( ( $value_sql != '' ) ? ', ' : '' ) . "('" . $word[$i] . "')";
break;
case 'mssql':
! $value_sql .= ( ( $value_sql != '' ) ? ' UNION ALL ' : '' ) . "SELECT '" . $word[$i] . "'";
break;
default:
***************
*** 217,221 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't insert new word", "", __LINE__, __FILE__, $sql);
}
break;
--- 217,221 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
}
break;
***************
*** 224,228 ****
}
! if ( $value_sql != "" )
{
switch ( SQL_LAYER )
--- 224,228 ----
}
! if ( $value_sql != '' )
{
switch ( SQL_LAYER )
***************
*** 241,245 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't insert new word", "", __LINE__, __FILE__, $sql);
}
}
--- 241,245 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
}
}
***************
*** 250,254 ****
$title_match = ( $word_in == 'title' ) ? 1 : 0;
! if ( $match_sql != "" )
{
$sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match)
--- 250,254 ----
$title_match = ( $word_in == 'title' ) ? 1 : 0;
! if ( $match_sql != '' )
{
$sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match)
***************
*** 258,262 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't insert new word matches", "", __LINE__, __FILE__, $sql);
}
}
--- 258,262 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not insert new word matches', '', __LINE__, __FILE__, $sql);
}
}
***************
*** 278,285 ****
global $db;
! $sql = ( $mode == "global" ) ? "SELECT COUNT(post_id) AS total_posts FROM " . SEARCH_MATCH_TABLE . " GROUP BY post_id" : "SELECT SUM(forum_posts) AS total_posts FROM " . FORUMS_TABLE;
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't obtain post count", "", __LINE__, __FILE__, $sql);
}
--- 278,285 ----
global $db;
! $sql = ( $mode == 'global' ) ? "SELECT COUNT(post_id) AS total_posts FROM " . SEARCH_MATCH_TABLE . " GROUP BY post_id" : "SELECT SUM(forum_posts) AS total_posts FROM " . FORUMS_TABLE;
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not obtain post count', '', __LINE__, __FILE__, $sql);
}
***************
*** 290,299 ****
$common_threshold = floor($row['total_posts'] * $fraction);
! if( $mode == "single" && count($word_id_list) )
{
! $word_id_sql = "";
for($i = 0; $i < count($word_id_list); $i++)
{
! $word_id_sql .= ( ( $word_id_sql != "" ) ? ", " : "" ) . "'" . $word_id_list[$i] . "'";
}
--- 290,299 ----
$common_threshold = floor($row['total_posts'] * $fraction);
! if( $mode == 'single' && count($word_id_list) )
{
! $word_id_sql = '';
for($i = 0; $i < count($word_id_list); $i++)
{
! $word_id_sql .= ( ( $word_id_sql != '' ) ? ', ' : '' ) . "'" . $word_id_list[$i] . "'";
}
***************
*** 315,330 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't obtain common word list", "", __LINE__, __FILE__, $sql);
}
! $common_word_id = "";
while( $row = $db->sql_fetchrow($result) )
{
! $common_word_id .= ( $common_word_id != "" ) ? ", " . $row['word_id'] : $row['word_id'];
}
$db->sql_freeresult($result);
! if( $common_word_id != "" )
{
$sql = "UPDATE " . SEARCH_WORD_TABLE . "
--- 315,330 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not obtain common word list', '', __LINE__, __FILE__, $sql);
}
! $common_word_id = '';
while( $row = $db->sql_fetchrow($result) )
{
! $common_word_id .= ( ( $common_word_id != '' ) ? ', ' : '' ) . $row['word_id'];
}
$db->sql_freeresult($result);
! if( $common_word_id != '' )
{
$sql = "UPDATE " . SEARCH_WORD_TABLE . "
***************
*** 333,337 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't delete word list entry", "", __LINE__, __FILE__, $sql);
}
--- 333,337 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
}
***************
*** 340,344 ****
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't delete word match entry", "", __LINE__, __FILE__, $sql);
}
}
--- 340,344 ----
if( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not delete word match entry', '', __LINE__, __FILE__, $sql);
}
}
***************
*** 443,447 ****
if ( !empty($search_match) )
{
! $username_search = preg_replace("/\*/", "%", trim(strip_tags($search_match)));
$sql = "SELECT username
--- 443,447 ----
if ( !empty($search_match) )
{
! $username_search = preg_replace('/\*/', '%', trim(strip_tags($search_match)));
$sql = "SELECT username
***************
*** 451,455 ****
if ( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, "Couldn't obtain search results", "", __LINE__, __FILE__, $sql);
}
--- 451,455 ----
if ( !($result = $db->sql_query($sql)) )
{
! message_die(GENERAL_ERROR, 'Could not obtain search results', '', __LINE__, __FILE__, $sql);
}
***************
*** 476,493 ****
$template->set_filenames(array(
! "search_user_body" => "search_username.tpl")
);
$template->assign_vars(array(
! "L_CLOSE_WINDOW" => $lang['Close_window'],
! "L_SEARCH_USERNAME" => $lang['Find_username'],
! "L_UPDATE_USERNAME" => $lang['Select_username'],
! "L_SELECT" => $lang['Select'],
! "L_SEARCH" => $lang['Search'],
! "L_SEARCH_EXPLAIN" => $lang['Search_author_explain'],
! "L_CLOSE_WINDOW" => $lang['Close_window'],
! "S_AUTHOR_OPTIONS" => $author_list,
! "S_SEARCH_ACTION" => append_sid("search.$phpEx?mode=searchuser"))
);
--- 476,493 ----
$template->set_filenames(array(
! 'search_user_body' => 'search_username.tpl')
);
$template->assign_vars(array(
! 'L_CLOSE_WINDOW' => $lang['Close_window'],
! 'L_SEARCH_USERNAME' => $lang['Find_username'],
! 'L_UPDATE_USERNAME' => $lang['Select_username'],
! 'L_SELECT' => $lang['Select'],
! 'L_SEARCH' => $lang['Search'],
! 'L_SEARCH_EXPLAIN' => $lang['Search_author_explain'],
! 'L_CLOSE_WINDOW' => $lang['Close_window'],
! 'S_AUTHOR_OPTIONS' => $author_list,
! 'S_SEARCH_ACTION' => append_sid("search.$phpEx?mode=searchuser"))
);
***************
*** 498,505 ****
if ( !empty($author_list) )
{
! $template->assign_block_vars("switch_select_name", array());
}
! $template->pparse("search_user_body");
include($phpbb_root_path . 'includes/page_tail.'.$phpEx);
--- 498,505 ----
if ( !empty($author_list) )
{
! $template->assign_block_vars('switch_select_name', array());
}
! $template->pparse('search_user_body');
include($phpbb_root_path . 'includes/page_tail.'.$phpEx);
|