|
From: <var...@us...> - 2009-03-03 20:23:07
|
Revision: 6624
http://phpwiki.svn.sourceforge.net/phpwiki/?rev=6624&view=rev
Author: vargenau
Date: 2009-03-03 20:23:04 +0000 (Tue, 03 Mar 2009)
Log Message:
-----------
pcre_fix_posix_classes no longer useful
Modified Paths:
--------------
trunk/lib/config.php
Modified: trunk/lib/config.php
===================================================================
--- trunk/lib/config.php 2009-03-03 16:05:01 UTC (rev 6623)
+++ trunk/lib/config.php 2009-03-03 20:23:04 UTC (rev 6624)
@@ -324,74 +324,8 @@
return $loc;
}
-/** string pcre_fix_posix_classes (string $regexp)
-*
-* Older version (pre 3.x?) of the PCRE library do not support
-* POSIX named character classes (e.g. [[:alnum:]]).
-*
-* This is a helper function which can be used to convert a regexp
-* which contains POSIX named character classes to one that doesn't.
-*
-* All instances of strings like '[:<class>:]' are replaced by the equivalent
-* enumerated character class.
-*
-* Implementation Notes:
-*
-* Currently we use hard-coded values which are valid only for
-* ISO-8859-1. Also, currently on the classes [:alpha:], [:alnum:],
-* [:upper:] and [:lower:] are implemented. (The missing classes:
-* [:blank:], [:cntrl:], [:digit:], [:graph:], [:print:], [:punct:],
-* [:space:], and [:xdigit:] could easily be added if needed.)
-*
-* This is a hack. I tried to generate these classes automatically
-* using ereg(), but discovered that in my PHP, at least, ereg() is
-* slightly broken w.r.t. POSIX character classes. (It includes
-* "\xaa" and "\xba" in [:alpha:].)
-*
-* So for now, this will do. --Jeff <da...@da...> 14 Mar, 2001
-*/
function pcre_fix_posix_classes ($regexp) {
- global $charset;
- if (!isset($charset))
- $charset = CHARSET; // get rid of constant. pref is dynamic and language specific
- if (in_array($GLOBALS['LANG'], array('zh')))
- $charset = 'utf-8';
- if (strstr($GLOBALS['LANG'],'.utf-8'))
- $charset = 'utf-8';
- elseif (strstr($GLOBALS['LANG'],'.euc-jp'))
- $charset = 'euc-jp';
- elseif (in_array($GLOBALS['LANG'], array('ja')))
- //$charset = 'utf-8';
- $charset = 'euc-jp';
-
- if (strtolower($charset) == 'utf-8') { // thanks to John McPherson
- // until posix class names/pcre work with utf-8
- if (preg_match('/[[:upper:]]/', '\xc4\x80'))
- return $regexp;
- // utf-8 non-ascii chars: most common (eg western) latin chars are 0xc380-0xc3bf
- // we currently ignore other less common non-ascii characters
- // (eg central/east european) latin chars are 0xc432-0xcdbf and 0xc580-0xc5be
- // and indian/cyrillic/asian languages
-
- // this replaces [[:lower:]] with utf-8 match (Latin only)
- $regexp = preg_replace('/\[\[\:lower\:\]\]/','(?:[a-z]|\xc3[\x9f-\xbf]|\xc4[\x81\x83\x85\x87])',
- $regexp);
- // this replaces [[:upper:]] with utf-8 match (Latin only)
- $regexp = preg_replace('/\[\[\:upper\:\]\]/','(?:[A-Z]|\xc3[\x80-\x9e]|\xc4[\x80\x82\x84\x86])',
- $regexp);
- } elseif (preg_match('/[[:upper:]]/', '\xC4')) {
- // First check to see if our PCRE lib supports POSIX character
- // classes. If it does, there's nothing to do.
- return $regexp;
- }
- static $classes = array(
- 'alnum' => "0-9A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff",
- 'alpha' => "A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff",
- 'upper' => "A-Z\xc0-\xd6\xd8-\xde",
- 'lower' => "a-z\xdf-\xf6\xf8-\xff"
- );
- $keys = join('|', array_keys($classes));
- return preg_replace("/\[:($keys):]/e", '$classes["\1"]', $regexp);
+ return $regexp;
}
function deduce_script_name() {
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|