From: Geoffrey T. D. <da...@us...> - 2001-03-14 19:46:28
|
Update of /cvsroot/phpwiki/phpwiki/lib In directory usw-pr-cvs1:/tmp/cvs-serv5332/lib Modified Files: config.php transform.php Log Message: Attempt to fix the recognition of international characters within WikiWords. Older (pre 3.x) PCRE libraries, which are used by some PHPs do not support POSIX named character classes (e.g. [:alnum:]). I've added a new function, pcre_fix_posix_classes(), which (only if necessary) will replace POSIX named classes in a regexp with an enumerated equivalent. Index: config.php =================================================================== RCS file: /cvsroot/phpwiki/phpwiki/lib/config.php,v retrieving revision 1.36 retrieving revision 1.37 diff -C2 -r1.36 -r1.37 *** config.php 2001/03/11 17:56:36 1.36 --- config.php 2001/03/14 19:48:29 1.37 *************** *** 118,121 **** --- 118,167 ---- setlocale('LC_CTYPE', 'en_US.iso-8859-1'); + /** string pcre_fix_posix_classes (string $regexp) + * + * Older version (pre 3.x?) of the PCRE library do not support + * POSIX named character classes (e.g. [[:alnum:]]). + * + * This is a helper function which can be used to convert a regexp + * which contains POSIX named character classes to one that doesn't. + * + * All instances of strings like '[:<class>:]' are replaced by the equivalent + * enumerated character class. + * + * Implementation Notes: + * + * Currently we use hard-coded values which are valid only for + * ISO-8859-1. Also, currently on the classes [:alpha:], [:alnum:], + * [:upper:] and [:lower:] are implemented. (The missing classes: + * [:blank:], [:cntrl:], [:digit:], [:graph:], [:print:], [:punct:], + * [:space:], and [:xdigit:] could easily be added if needed.) + * + * This is a hack. I tried to generate these classes automatically + * using ereg(), but discovered that in my PHP, at least, ereg() is + * slightly broken w.r.t. POSIX character classes. (It includes + * "\xaa" and "\xba" in [:alpha:].) + * + * So for now, this will do. --Jeff <da...@da...> 14 Mar, 2001 + */ + function pcre_fix_posix_classes ($regexp) { + // First check to see if our PCRE lib supports POSIX character + // classes. If it does, there's nothing to do. + if (preg_match('/[[:upper:]]/', 'A')) + return $regexp; + + static $classes = array( + 'alnum' => "0-9A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff", + 'alpha' => "A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\xff", + 'upper' => "A-Z\xc0-\xd6\xd8-\xde", + 'lower' => "a-z\xdf-\xf6\xf8-\xff" + ); + + $keys = join('|', array_keys($classes)); + + return preg_replace("/\[:($keys):]/e", '$classes["\1"]', $regexp); + } + + $WikiNameRegexp = pcre_fix_posix_classes($WikiNameRegexp); + ////////////////////////////////////////////////////////////////// // Autodetect URL settings: Index: transform.php =================================================================== RCS file: /cvsroot/phpwiki/phpwiki/lib/transform.php,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -r1.20 -r1.21 *** transform.php 2001/03/07 16:45:20 1.20 --- transform.php 2001/03/14 19:48:29 1.21 *************** *** 284,288 **** if (function_exists('wtt_interwikilinks')) { $transform->register(WT_TOKENIZER, 'wtt_interwikilinks', ! "!?(?<![[:alnum:]])$InterWikiLinkRegexp:$WikiNameRegexp"); } $transform->register(WT_TOKENIZER, 'wtt_bumpylinks', "!?$WikiNameRegexp"); --- 284,289 ---- if (function_exists('wtt_interwikilinks')) { $transform->register(WT_TOKENIZER, 'wtt_interwikilinks', ! pcre_fix_posix_classes("!?(?<![[:alnum:]])") . ! "$InterWikiLinkRegexp:$WikiNameRegexp"); } $transform->register(WT_TOKENIZER, 'wtt_bumpylinks', "!?$WikiNameRegexp"); |