From: <var...@us...> - 2009-03-03 20:23:07
|
Revision: 6624 http://phpwiki.svn.sourceforge.net/phpwiki/?rev=6624&view=rev Author: vargenau Date: 2009-03-03 20:23:04 +0000 (Tue, 03 Mar 2009) Log Message: ----------- pcre_fix_posix_classes no longer useful Modified Paths: -------------- trunk/lib/config.php Modified: trunk/lib/config.php =================================================================== --- trunk/lib/config.php 2009-03-03 16:05:01 UTC (rev 6623) +++ trunk/lib/config.php 2009-03-03 20:23:04 UTC (rev 6624) @@ -324,74 +324,8 @@ return $loc; } -/** string pcre_fix_posix_classes (string $regexp) -* -* Older version (pre 3.x?) of the PCRE library do not support -* POSIX named character classes (e.g. [[:alnum:]]). -* -* This is a helper function which can be used to convert a regexp -* which contains POSIX named character classes to one that doesn't. -* -* All instances of strings like '[:<class>:]' are replaced by the equivalent -* enumerated character class. -* -* Implementation Notes: -* -* Currently we use hard-coded values which are valid only for -* ISO-8859-1. Also, currently on the classes [:alpha:], [:alnum:], -* [:upper:] and [:lower:] are implemented. (The missing classes: -* [:blank:], [:cntrl:], [:digit:], [:graph:], [:print:], [:punct:], -* [:space:], and [:xdigit:] could easily be added if needed.) -* -* This is a hack. I tried to generate these classes automatically -* using ereg(), but discovered that in my PHP, at least, ereg() is -* slightly broken w.r.t. POSIX character classes. (It includes -* "\xaa" and "\xba" in [:alpha:].) -* -* So for now, this will do. --Jeff <da...@da...> 14 Mar, 2001 -*/ function pcre_fix_posix_classes ($regexp) { - global $charset; - if (!isset($charset)) - $charset = CHARSET; // get rid of constant. pref is dynamic and language specific - if (in_array($GLOBALS['LANG'], array('zh'))) - $charset = 'utf-8'; - if (strstr($GLOBALS['LANG'],'.utf-8')) - $charset = 'utf-8'; - elseif (strstr($GLOBALS['LANG'],'.euc-jp')) - $charset = 'euc-jp'; - elseif (in_array($GLOBALS['LANG'], array('ja'))) - //$charset = 'utf-8'; - $charset = 'euc-jp'; - - if (strtolower($charset) == 'utf-8') { // thanks to John McPherson - // until posix class names/pcre work with utf-8 - if (preg_match('/[[:upper:]]/', '\xc4\x80')) - return $regexp; - // utf-8 non-ascii chars: most common (eg western) latin chars are 0xc380-0xc3bf - // we currently ignore other less common non-ascii characters - // (eg central/east european) latin chars are 0xc432-0xcdbf and 0xc580-0xc5be - // and indian/cyrillic/asian languages - - // this replaces [[:lower:]] with utf-8 match (Latin only) - $regexp = preg_replace('/\[\[\:lower\:\]\]/','(?:[a-z]|\xc3[\x9f-\xbf]|\xc4[\x81\x83\x85\x87])', - $regexp); - // this replaces [[:upper:]] with utf-8 match (Latin only) - $regexp = preg_replace('/\[\[\:upper\:\]\]/','(?:[A-Z]|\xc3[\x80-\x9e]|\xc4[\x80\x82\x84\x86])', - $regexp); - } elseif (preg_match('/[[:upper:]]/', '\xC4')) { - // First check to see if our PCRE lib supports POSIX character - // classes. If it does, there's nothing to do. - return $regexp; - } - static $classes = array( - 'alnum' => "0-9A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff", - 'alpha' => "A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff", - 'upper' => "A-Z\xc0-\xd6\xd8-\xde", - 'lower' => "a-z\xdf-\xf6\xf8-\xff" - ); - $keys = join('|', array_keys($classes)); - return preg_replace("/\[:($keys):]/e", '$classes["\1"]', $regexp); + return $regexp; } function deduce_script_name() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |