From: <be...@us...> - 2008-05-18 15:33:09
|
Revision: 1167 http://geshi.svn.sourceforge.net/geshi/?rev=1167&view=rev Author: benbe Date: 2008-05-18 08:33:07 -0700 (Sun, 18 May 2008) Log Message: ----------- add: Building up GeSHi 1.1.X developement branch using old releases Modified Paths: -------------- branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/class.geshiccodeparser.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/common.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/codeworker/codeworker.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/css/css.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/delphi/common.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/delphi/delphi.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/java/java.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/javascript/javascript.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/php/class.geshiphpcodeparser.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/php/common.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/sql/sql.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/vhdl/vhdl.php branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/themes/default/c/c.php Added Paths: ----------- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php 2008-05-18 15:24:09 UTC (rev 1166) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php 2008-05-18 15:33:07 UTC (rev 1167) @@ -42,7 +42,7 @@ $geshi_old_reporting_level = error_reporting(E_ALL); /** GeSHi Version */ -define('GESHI_VERSION', '1.1.2alpha2'); +define('GESHI_VERSION', '1.1.2alpha3'); /** Set the correct directory separator */ define('GESHI_DIR_SEP', ('WIN' != substr(PHP_OS, 0, 3)) ? '/' : '\\'); @@ -318,7 +318,7 @@ * language. If caching of the root context is enabled, then this time will likely * be close to zero if you are calling this method after second and subsequent calls * to {@link GeSHi::parseCode()}.</li> - * <li>If youpass <b>'parse'</b>, you will get the time it took to parse the last + * <li>If you pass <b>'parse'</b>, you will get the time it took to parse the last * time {@link GeSHi::parseCode()} was called. * </ul> * @@ -382,6 +382,43 @@ } // }}} + // {{{ getSupportedLanguages() + + /** + * @todo document this function + * @todo This and other methods share a lot of directory traversal + * functionality, which could be split out somehow. + * @todo actually, this should be implemented using a registry + */ + function getSupportedLanguages ($return_human = false) + { + $languages = array(); + + $ignore = array('.', '..', 'CVS'); + $dh = opendir(GESHI_LANGUAGES_ROOT); + while (false !== ($dir = readdir($dh))) { + if (in_array($dir, $ignore) || is_file(GESHI_LANGUAGES_ROOT . $dir)) continue; + // Check the directory for the dialect files + $ldh = opendir(GESHI_LANGUAGES_ROOT . $dir); + while (false !== ($file = readdir($ldh))) { + if (in_array($file, $ignore) || is_dir(GESHI_LANGUAGES_ROOT . "$dir/$file") || substr($file, -4) != '.php') continue; + + // Found a language file + $file = substr($file, 0, -4); + if ('common' == $file || 'class' == substr($file, 0, 5)) continue; + + if ($return_human) { + $languages["$dir/$file"] = GeSHi::getHumanLanguageName("$dir/$file"); + } else { + $languages[] = "$dir/$file"; + } + } + } + + return $languages; + } + + // }}} // {{{ getSupportedThemes() /** @@ -504,6 +541,26 @@ } // }}} + // {{{ getHumanLanguageName() + + /** + * Given a language name, return a human version of it + * + * @param string $language The language name to get the human version of + * @return string The human language name, or <kbd>false</kbd> if the + * language does not exist + * @static + * @todo actually implement this function + * @since 1.1.2 + */ + function getHumanLanguageName ($language) + { + $human_name = ''; + $language = GeSHi::_clean($language); + return $language; + } + + // }}} // {{{ getHumanThemeName() /** Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php 2008-05-18 15:24:09 UTC (rev 1166) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php 2008-05-18 15:33:07 UTC (rev 1167) @@ -786,8 +786,6 @@ $length = $data['len']; if (isset($data['tab'])) { geshi_dbg('Table: ' . print_r($data['tab'], true)); - $this->_startRegexTable = $data['tab']; - $delimiter = $data['tab'][0]; } if (false !== $position) { @@ -798,6 +796,10 @@ $first_position = $position; $first_length = $length; $first_key = $key; + if (isset($data['tab'])) { + $this->_startRegexTable = $data['tab']; + $delimiter = $data['tab'][0]; + } $first_dlm = $delimiter; } } else { Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php 2008-05-18 15:24:09 UTC (rev 1166) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php 2008-05-18 15:33:07 UTC (rev 1167) @@ -6,10 +6,10 @@ * Author: Nigel McNie * E-mail: ni...@ge... * </pre> - * + * * For information on how to use GeSHi, please consult the documentation * found in the docs/ directory, or online at http://geshi.org/docs/ - * + * * This program is part of GeSHi. * * This program is free software; you can redistribute it and/or modify @@ -21,160 +21,236 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * @package geshi * @subpackage core - * @author Nigel McNie <ni...@ge...> + * @author Nigel McNie <ni...@ge...>; + * http://clc-wiki.net/wiki/User:Netocrat * @license http://www.gnu.org/copyleft/gpl.html GNU GPL * @copyright (C) 2004 - 2006 Nigel McNie * @version $Id$ - * + * */ /** * The GeSHiSingleCharContext class. This class extends GeSHiContext to handle * "single character" strings - strings that are only one character long, like * in java. - * - * Note that this functionality assumes that the delimiters for single character - * contexts are just one character long (a sensible assumption made for speed - * reasons). If required in the future this class could support longer delimiters. - * + * + * Escape sequences need not be limited to one character and may be REGEX- + * specified, to allow for situations such as C's octal and hexadecimal escapes, + * e.g. '\xFF'. Likewise for the start and end delimiter, and the escape + * "character". This is handy for situations such as C's widestring + * characters, which are prefixed by an L. + * * @package geshi * @subpackage core - * @author Nigel McNie <ni...@ge...> + * @author Nigel McNie <ni...@ge...>; Netocrat * @since 1.1.1 * @version $Revision$ * @see GeSHiContext */ class GeSHiSingleCharContext extends GeSHiContext { - + // {{{ properties - + /**#@- * @access private */ + /** The parsed data when getContextStartData() is successful. */ + var $_characterLen; + var $_endDelimiterLen; + var $_isEscapeSeq; + + var $_disallowEmpty; + + /** Characters that start an escape sequence... */ var $_escapeCharacters; - - // Characters that should be escaped + /** ...and the valid escape sequences that can follow. */ var $_charsToEscape; - + /**#@-*/ - + // }}} // {{{ setEscapeCharacters() - + /** + * Specifies each "character" that should be interpreted as the start of an + * escape sequence when it occurs immediately subsequent to a start + * delimiter. Each "character" may be greater than one actual character in + * length, and may optionally be specified by a REGEX-string - look-behind + * assertions on such regexes are not supported. + * @param Mixed Array of strings or single string. + */ function setEscapeCharacters ($chars) { $this->_escapeCharacters = (array) $chars; } - + // }}} // {{{ setCharactersToEscape() - + /** + * Specifies all escape sequences that are valid following any of the + * escape characters. Each escape sequence may be greater than one + * character in length and may be specified by a REGEX-string - look-behind + * assertions on such regexes are not supported. + * @param Mixed Array of strings or single string. + */ function setCharactersToEscape ($chars) { - $this->_charsToEscape = (array) $chars; + static $re_starter_c = 'REGEX'; + static $re_starter_len_c = 5/*strlen($re_starter_c)*/; + $this->_charsToEscape = array(); + /* Save a little time and processing by anchoring all regexes now, + * rather than each time geshi_whichsubstr() is called. + */ + foreach ((array)$chars as $escSeq) { + if (strncmp($escSeq, $re_starter_c, $re_starter_len_c) == 0) { + $re = substr($escSeq, $re_starter_len_c); + $re = geshi_anchor_re($re); + $this->_charsToEscape[] = $re_starter_c.$re; + } else $this->_charsToEscape[] = $escSeq; + } } - + // }}} + // {{{ setDisallowEmpty() + /** + * Call this to specify whether to disallow empty characters - e.g. in C, '' + * is invalid. By default empty characters are allowed. The default + * parameter value of this function is true so e.g. in C's case this can be + * called simply as $context->setDisallowEmptyChars(). + * @param boolean $value Defaults to true. + */ + function setDisallowEmptyChars ($value = true) + { + $this->_disallowEmpty = $value; + } + + // }}} // {{{ getContextStartData() /** * GetContextStartData - * - * Overridden to check if this context should even start. If we can't find - * a valid end-of-string character in the correct place this context should - * not even start. - * + * + * Overrides the parent method to check whether this context should even + * start. Checks for a complete character including start and end + * delimiters and valid contained character, which might be an escape + * sequence. Stores all data found so that it may be used by + * _getContextEndData() and _addParseData(), to avoid reparsing. + * * @param string $code * @param string $start_of_context */ function getContextStartData ($code, $start_of_context) { - geshi_dbg('GeSHiSingleCharContext::getContextStartData(' . $this->_contextName . ', ' . $start_of_context . ')'); - + geshi_dbg('GeSHiSingleCharContext::getContextStartData(' . + $this->_contextName . ', ' . $start_of_context . ')'); + $offset = 0; + $data = null; while (true) { + /* For retries, strip to just past the last failed start. */ + if ($data != null) { + $code = substr($code, $data['pos'] + 1); + $offset += $data['pos'] + 1; + } + $data = parent::getContextStartData($code, $start_of_context); - - // First, if no match then bail - if (-1 === $data['pos']) { - return $data; - } - - $first_position = $data['pos']; - $first_length = $data['len']; - $first_key = $data['key']; - $first_dlm = $data['dlm']; - - // Check for empty character - // WARN: claim here that delimiters are only one char long! - if (in_array(substr($code, $first_position + 1, 1), $this->_contextDelimiters[$first_key][1])) { - // Nothing wrong with this + + /* First, if no match then bail */ + if (-1 === $data['pos']) break; + + /* Check for empty character */ + $end_delim = geshi_whichsubstr($code, $this->_contextDelimiters[ + $data['key']][1], $data['pos'] + $data['len'], + GESHI_WHICHSS_MAXIMAL|GESHI_WHICHSS_TRYREGEX); + if ($end_delim !== null) { + if (!$this->_disallowEmpty) { + $data['pos'] += $offset; + $this->_characterLen = 0; + $this->_endDelimiterLen = strlen($end_delim); + $this->_isEscapeSeq = false; + break; + } else { + /* Support a (hypothetical) syntax where empty characters + * are not permitted but where the end delimiter doubles as + * an escape character. */ + $empty = true; + } + } else $empty = false; + + /* Check for the start of an escape sequence */ + $esc_start = geshi_whichsubstr($code, $this->_escapeCharacters, + $data['pos'] + $data['len'], GESHI_WHICHSS_MAXIMAL| + GESHI_WHICHSS_TRYREGEX); + $esc_len = strlen($esc_start); + if ($esc_start !== null) { + /* Check for a valid full escape sequence; allow regexes + * that match sequences of length > 1. Match the most + * inclusive char/regex. */ + $start = $data['pos'] + $data['len'] + $esc_len; + $esc_seq = geshi_whichsubstr($code, $this->_charsToEscape, + $start, GESHI_WHICHSS_MAXIMAL|GESHI_WHICHSS_TRYREGEX| + GESHI_WHICHSS_SKIPANCHORINSERT); + if ($esc_seq === null) continue; + else $char_len = $esc_len + strlen($esc_seq); + } else if ($empty) continue; + else $char_len = 1; /* Possible single unescaped character */ + + $final_char_offset = $data['len'] + $char_len; + + /* Check for an end delimiter and if found, return successfully */ + $end_delim = geshi_whichsubstr($code, $this->_contextDelimiters[ + $data['key']][1], $data['pos'] + $final_char_offset, + GESHI_WHICHSS_MAXIMAL|GESHI_WHICHSS_TRYREGEX); + if ($end_delim !== null) { $data['pos'] += $offset; - return $data; + $this->_characterLen = $char_len; + $this->_endDelimiterLen = strlen($end_delim); + $this->_isEscapeSeq = ($esc_start !== null); + break; } - - // Check for single alone character - $final_char_offset = (in_array(substr($code, $first_position + 1, 1), $this->_escapeCharacters)) - ? 3 : 2; - if (in_array(substr($code, $first_position + $final_char_offset, 1), - $this->_contextDelimiters[$first_key][1])) { - $data['pos'] += $offset; - return $data; - } - - - // End: strip to just past where the character failed to start and try again - $code = substr($code, $first_position + 1); - $offset += $first_position + 1; } - - return array('pos' => $first_position, 'len' => $first_length, - 'key' => $first_key, 'dlm' => $first_dlm); + return $data; } - + // }}} // {{{ _getContextEndData() - + /** - * In this case we don't need to worry about much because we have made sure in - * _getContextStartData that we are starting in the right place. + * In this case we don't need to worry about much because we have made sure + * in _getContextStartData that we are starting in the right place. */ - function _getContextEndData ($code, $context_open_key, $context_opener, $beginning_of_context) + function _getContextEndData ($code, $context_open_key, $context_opener, + $beginning_of_context) { - $pos = 1; - $first_char = substr($code, 0, 1); - if (in_array($first_char, $this->_escapeCharacters)) { - $pos = 2; - } elseif (in_array($first_char, $this->_contextDelimiters[$context_open_key][1])) { - $pos = 0; - } - return array('pos' => $pos, 'len' => 1 /*see WARN above*/, 'dlm' => ''); + return array('pos' => $this->_characterLen, + 'len' => $this->_endDelimiterLen, + 'dlm' => ''); } - + // }}} // {{{ _addParseData() - + /** * Overrides _addParseData to add escape characters also */ function _addParseData ($code, $first_char_of_next_context = '') { - geshi_dbg('GeSHiSingleCharContext::_addParseData(' . substr($code, 0, 15) . '...)'); - if (in_array(substr($code, 0, 1), $this->_escapeCharacters)) { + geshi_dbg('GeSHiSingleCharContext::_addParseData(' . + substr($code, 0, 15) . '...)'); + if ($this->_isEscapeSeq) { $this->_styler->addParseData($code, $this->_contextName . '/esc', $this->_getExtraParseData(), $this->_complexFlag); } else { parent::_addParseData($code, $first_char_of_next_context); } } - + // }}} } Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php 2008-05-18 15:24:09 UTC (rev 1166) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php 2008-05-18 15:33:07 UTC (rev 1167) @@ -54,115 +54,237 @@ /**#@- * @access private */ - var $_escapeCharacters = array(); - // Characters that should be escaped - var $_charsToEscape = array(); + /** + * Escape character groups. + * + * @var array + */ + var $_escapeGroups = array(); /**#@-*/ // }}} - // {{{ setEscapeCharacters() + // {{{ addEscapeGroup() /** - * Sets the characters that are used to escape other characters in a string + * Adds an escape group to this string context. + * + * An escape group consists of a group of characters that are escape + * characters, and another group of characters or regexes that are + * the characters to escape. You can also specify a context name for + * the escaped characters. + * + * The escape characters MUST be one character in length, and are + * automatically assumed to escape themselves. + * + * @param mixed $escape_characters The characters that escape others + * @param mixed $characters_to_escape The characters/regexes that are + * escaped + * @param string $context_name A name for the escaped characters */ - function setEscapeCharacters ($chars) + function addEscapeGroup ($escape_characters, + $characters_to_escape = array(), $context_name = 'esc') { - $this->_escapeCharacters = (array) $chars; + // Sanity checking + $escape_characters = (array) $escape_characters; + $characters_to_escape = (array) $characters_to_escape; + foreach ($escape_characters as $char) { + if (strlen($char) != 1) { + trigger_error('GeSHiStringContext::addEscapeGroup(): malformed' + . ' language file: cannot have escape characters that are' + . ' longer than one character in length'); + } + if (!in_array($char, $characters_to_escape)) { + $characters_to_escape[] = $char; + } + } + + $this->_escapeGroups[] = array( + $escape_characters, + $characters_to_escape, + $context_name + ); } // }}} - // {{{ setCharactersToEscape() + // {{{ _getContextEndData() - function setCharactersToEscape ($chars) - { - $this->_charsToEscape = (array) $chars; - } - - // }}} - /** - * GetContextEndData + * Finds the end of a string context, taking the escape characters into + * account. + * + * @param string $code The code to look for the end of the + * context in + * @param int $context_open_key The key in the array of delimiters + * which corresponds to the opener + * @param string $context_opener The actual opener for the string */ function _getContextEndData ($code, $context_open_key, $context_opener) { - geshi_dbg('GeSHiStringContext::_getContextEndData(' . $this->_contextName . ', ' . $context_open_key . ', ' . $context_opener . ')'); + geshi_dbg('GeSHiStringContext::_getContextEndData(' + . $this->_contextName . ')'); $this->_lastOpener = $context_opener; $ender_data = array(); foreach ($this->_contextDelimiters[$context_open_key][1] as $ender) { - geshi_dbg(' Checking ender: ' . $ender); - // Prepare ender regexes if needed $ender = $this->_substitutePlaceholders($ender); - geshi_dbg(' ender after substitution: ' . $ender); + geshi_dbg(' Checking ender: ' . str_replace("\n", '\n', $ender)); - $pos = 0; + $tmp_str = $code; + $current_pos = 0; + while (true) { - $pos = geshi_get_position($code, $ender, $pos); - if (false === $pos) { + geshi_dbg("@btop of loop; current_pos = $current_pos; str=" + . substr($tmp_str, 0, 10)); + $pos_data = geshi_get_position($tmp_str, $ender); + if (false === $pos_data['pos']) { + geshi_dbg("could not find ender $ender in string " + . substr($tmp_str, 0, 10)); break; } - $len = $pos['len']; - $pos = $pos['pos']; - - $possible_string = substr($code, 0, $pos); - geshi_dbg(' String might be: ' . $possible_string); - - $not_escaped = true; - if ($this->_escapeCharacters) { - foreach ($this->_escapeCharacters as $escape_char) { - // remove escaped escape characters - $possible_string = str_replace($escape_char . $escape_char, '', $possible_string); - } - - geshi_dbg(' String with double escapes removed: ' . $possible_string); + geshi_dbg("found ender $ender at position " . $pos_data['pos']); - foreach ($this->_escapeCharacters as $escape_char) { - if (substr($possible_string, -1) == $escape_char) { - $not_escaped = false; - break; + // While we may have found an ender, it might be escaped. + // Finding out for sure whether it is escaped is harder than + // it may initially seem - we have to check each previous + // character to see if it escapes the one after it, and flip + // a flag which detects whether the initial character is + // escaped, or whether the character before the initial + // character is escaped (and thus the ender we found is the + // real thing). + $i = $pos_data['pos'] - 1; + if ($i >= 0) { + $current_char = substr($tmp_str, $i, 1); + $after_char = substr($tmp_str, $i + 1, 1); + geshi_dbg("checking char $current_char to see if it" + . " escapes the char $after_char"); + if ($this->_charEscapesChar($current_char, $after_char)) { + geshi_dbg(" it does! Might not have found the ender"); + $found_ender = true; + geshi_dbg('checking whether ' . substr($tmp_str, $i, 1) + . ' escapes ' . substr($tmp_str, $i + 1, 1)); + while (($i == 0 && $this->_isEscapeChar(substr($tmp_str, $i, 1))) || + $i > 0 + && $this->_charEscapesChar(substr($tmp_str, $i, 1), + substr($tmp_str, $i + 1, 1))) { + $found_ender = !$found_ender; + if (0 == $i) { + geshi_dbg('reached start of string and char is escape'); + } else { + geshi_dbg(substr($tmp_str, $i, 1) . ' escapes ' + . substr($tmp_str, $i + 1, 1) . ': found_ender=' + . $found_ender); + } + --$i; } - - if ($escape_char == $ender - && substr($code, $pos + 1, 1) == $escape_char) { - // We have encountered the case where a string - // has its own ender as a delimiter and as an - // escape character - $not_escaped = false; - break; + geshi_dbg('finished: found_ender=' . $found_ender); + if (!$found_ender) { + geshi_dbg('we did NOT find ender, it was escaped'); + $current_pos += $pos_data['pos'] + 1; + $tmp_str = substr($tmp_str, $pos_data['pos'] + 1); + continue; } + geshi_dbg('Found ender since the last char is escaped'); } + else { + geshi_dbg(" does not seem to escape the next char"); + } } - - if ($not_escaped) { - // We may have found the correct ender. If we haven't, then this string - // never ends and we will set the end position to the length of the code - // substr($code, $pos, 1) == $ender - $endpos = geshi_get_position($code, $ender, $pos); - geshi_dbg(' position of ender: ' . $endpos['pos']); - $pos = (false !== $pos && $endpos['pos'] === $pos) ? $pos : strlen($code); - if (!$ender_data || $ender_data['pos'] > $pos) { - $ender_data = array('pos' => $pos, 'len' => $len, 'dlm' => $ender); + + if ($pos_data['pos'] != strlen($tmp_str) + && $this->_charEscapesChar($ender, + substr($tmp_str, $pos_data['pos'] + 1, 1))) { + // We did not find the ender + geshi_dbg('ender is escaping the next char - ' + . substr($tmp_str, $pos_data['pos'] + 1, 1)); + $current_pos += $pos_data['pos'] + 1 + $pos_data['len']; + $tmp_str = substr($tmp_str, $pos_data['pos'] + 1 + + $pos_data['len']); + continue; + } + else { + geshi_dbg("Not escaped or escaping: Found at position " + . $pos_data['pos']); + if (!$ender_data || $pos_data['pos'] < $ender_data['pos']) { + geshi_dbg('earliest'); + $ender_data['pos'] = $pos_data['pos'] + $current_pos; + $ender_data['dlm'] = $ender; + $ender_data['len'] = $pos_data['len']; } + break; } - - // else, start further up - ++$pos; } } geshi_dbg('Ender data: ' . print_r($ender_data, true)); return ($ender_data) ? $ender_data : false; } + // }}} + // {{{ _charEscapesChar() + /** - * Overrides addParseData to add escape characters also + * Returns true if $escape_char escapes $char_to_escape. + * + * @param string $escape_char The escape character + * @param string $char_to_escape The character being escaped + * @return boolean */ + function _charEscapesChar ($escape_char, $char_to_escape) + { + static $result_table = array(); + if (isset($result_table[$escape_char][$char_to_escape])) { + return $result_table[$escape_char][$char_to_escape]; + } + + foreach ($this->_escapeGroups as $group) { + if (in_array($escape_char, $group[0])) { + return $result_table[$escape_char][$char_to_escape] + = in_array($char_to_escape, $group[1]); + } + } + + return $result_table[$escape_char][$char_to_escape] = false; + } + + // }}} + // {{{ _isEscapeChar() + + /** + * Returns true if $escape_char is an escape character in any group. + * + * @param string $escape_char The escape character + * @return boolean + */ + function _isEscapeChar ($escape_char) + { + static $result_table = array(); + if (isset($result_table[$escape_char])) { + return $result_table[$escape_char]; + } + + foreach ($this->_escapeGroups as $group) { + if (in_array($escape_char, $group[0])) { + return $result_table[$escape_char] = true; + } + } + return $result_table[$escape_char] = false; + } + + // }}} + // {{{ _addParseData() + + /** + * Overrides addParseData to add escape characters also. + * + * @param string $code + * @param string $first_char_of_next_context + */ function _addParseData ($code, $first_char_of_next_context = '') { - geshi_dbg('GeSHiStringContext::_addParseData(' . substr($code, 0, 15) . '...)'); + geshi_dbg('GeSHiStringContext::_addParseData(' . substr($code, 0, 15)); $length = strlen($code); $string = ''; @@ -171,22 +293,30 @@ geshi_dbg('Char: ' . $char); $skip = false; - foreach ($this->_escapeCharacters as $escape_char) { - $len = 1; - if ($char == $escape_char && (false !== ($len = $this->_shouldBeEscaped(substr($code, $i + 1))))) { - geshi_dbg('Match: len = ' . $len); - if ($string) { - $this->_styler->addParseData($string, $this->_contextName, - $this->_getExtraParseData(), $this->_complexFlag); - $string = ''; + foreach ($this->_escapeGroups as $group) { + foreach ($group[0] as $escape_char) { + $len = 1; + if ($char == $escape_char + && (false !== ($len = $this->_shouldBeEscaped( + substr($code, $i + 1), $group[1])))) { + geshi_dbg('Match: len = ' . $len); + if ($string) { + $this->_styler->addParseData($string, + $this->_contextName, + $this->_getExtraParseData(), + $this->_complexFlag); + $string = ''; + } + + $this->_styler->addParseData($escape_char + . substr($code, $i + 1, $len), + "$this->_contextName/$group[2]", + $this->_getExtraParseData(), + $this->_complexFlag); + $i += $len; + $skip = true; + break; } - // Needs a better name than /esc - $this->_styler->addParseData($escape_char . substr($code, $i + 1, $len), $this->_contextName . '/esc', - $this->_getExtraParseData(), $this->_complexFlag); - // FastForward - $i += $len; - $skip = true; - break; } } @@ -195,10 +325,14 @@ } } if ($string) { - $this->_styler->addParseData($string, $this->_contextName, $this->_getExtraParseData(), + $this->_styler->addParseData($string, $this->_contextName, + $this->_getExtraParseData(), $this->_complexFlag); } - } + } + + // }}} + // {{{ _shouldBeEscaped() /** * Checks whether the character(s) at the start of the parameter string are @@ -207,16 +341,17 @@ * @param string The string to check the beginning of for escape characters * @return int|false The length of the escape character sequence, else false */ - function _shouldBeEscaped ($code) + function _shouldBeEscaped ($code, $chars_to_escape) { geshi_dbg('Checking: ' . substr($code, 0, 15)); - foreach ($this->_charsToEscape as $match) { + foreach ($chars_to_escape as $match) { if ('REGEX' != substr($match, 0, 5)) { geshi_dbg('Test: ' . $match); if (substr($code, 0, 1) == $match) { return 1; } - } else { + } + else { geshi_dbg(' Testing via regex: ' . $match . '... ', false); $data = geshi_get_position($code, $match, 0); if (0 === $data['pos']) { @@ -229,6 +364,9 @@ // No matches... return false; } + + // }}} + } ?> Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php 2008-05-18 15:24:09 UTC (rev 1166) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php 2008-05-18 15:33:07 UTC (rev 1167) @@ -6,10 +6,10 @@ * Author: Nigel McNie * E-mail: ni...@ge... * </pre> - * + * * For information on how to use GeSHi, please consult the documentation * found in the docs/ directory, or online at http://geshi.org/docs/ - * + * * This program is part of GeSHi. * * This program is free software; you can redistribute it and/or modify @@ -21,7 +21,7 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA @@ -32,7 +32,7 @@ * @license http://www.gnu.org/copyleft/gpl.html GNU GPL * @copyright (C) 2004 - 2006 Nigel McNie * @version $Id$ - * + * */ $GLOBALS['geshi_dbg'] = false; @@ -52,7 +52,7 @@ /** * Handles debugging by printing a message according to current debug level, * mask of context and other things. - * + * * @param string The message to print out * @param int The context in which this message is to be printed out in - see * the GESHI_DBG_* constants @@ -78,15 +78,15 @@ case '@b': $start = '<span style="font-weight:bold;">'; break; - + case '@i': $start = '<span style="font-style:italic;">'; break; - + case '@o': $start = '<span style="color:green;background-color:#efe;border:1px solid #393;">'; break; - + case '@w': $start = '<span style="color:#660;background-color:#ffe;border:1px solid #993;">'; break; @@ -94,22 +94,22 @@ case '@e': $start = '<span style="color:red;background-color:#fee;border:1px solid #933;">'; break; - + default: $end = ''; } - + if (preg_match('#(.*?)::(.*?)\((.*?)\)#si', $message)) { $start = '<span style="font-weight:bold;">'; $end = '</span>'; } - + if (preg_match('#^@[a-z]#', $message)) { $message = substr($message, 2); } echo $start . htmlspecialchars(str_replace("\n", '', $message)) . $end; if ($add_nl) echo "\n"; - } + } } /** @@ -157,56 +157,148 @@ } /** - * Drop-in replacement for strpos and stripos. Also can handle regular expression + * A replacement for strpos and stripos that can also handle regular expression * string positions. - * + * * @param string The string in which to search for the $needle - * @param string The string to search for. If this string starts with "REGEX" then - * a regular expression search is performed. - * @param int The offset in the string in which to start searching + * @param string The string to search for. If this string starts with "REGEX" + * then a regular expression search is performed. + * @param int The offset in the string in which to start searching. Look- + * behind assertions in a regex that refer to characters prior to + * this point will not match. * @param boolean Whether the search is case sensitive or not - * @param boolean Whether the match table is needed (almost never, and it makes things slower) + * @param boolean Whether the match table is needed (almost never, and it makes + * things slower, but probably not noticeably). * @return array An array of data: * <pre> 'pos' => position in string of needle, * 'len' => length of match - * 'tab' => a table of the stuff matched in brackets for a regular expression</pre> + * 'tab' => a tabular array containing the parenthesised sub-matches of a + * regular expression. [0] is the complete match, [1] the first parenthesized + * sub-match, and so on. + * </pre> * @access private */ -function geshi_get_position ($haystack, $needle, $offset = 0, $case_sensitive = false, $need_table = false) +function geshi_get_position ($haystack, $needle, $offset = 0, + $case_sensitive = false, $need_table = false) { if ('REGEX' != substr($needle, 0, 5)) { if (!$case_sensitive) { - return array('pos' => stripos($haystack, $needle, $offset), 'len' => strlen($needle)); + // @todo [blocking 1.1.4] This line is marked by BenBE as + // one of the slowest. If you don't have PHP5 then this is + // done manually, the function for it should probably be + // cleaned up a bit. + return array('pos' => stripos($haystack, $needle, $offset), + 'len' => strlen($needle)); } else { - return array('pos' => strpos($haystack, $needle, $offset), 'len' => strlen($needle)); + return array('pos' => strpos($haystack, $needle, $offset), + 'len' => strlen($needle)); } } - + $regex = substr($needle, 5); + $haystack_offset = substr($haystack, $offset); + $table = array(); + $length = 0; + $flags = PREG_SPLIT_OFFSET_CAPTURE; + if ($need_table) $flags |= PREG_SPLIT_DELIM_CAPTURE; + // @todo [blocking 1.1.4] This line is marked by BenBE as one of the + // slowest. There's not too much that can be done to speed up the line + // per se, but possibly something similar to the "here's a character + // you can check to see if this is ever going to pass" might be useful. + $splits = preg_split($regex, $haystack_offset, 2, $flags); + if (count($splits) > 1) { + $first = array_shift($splits); + $last = array_pop($splits); + $pos = strlen($first[0]); + $length = $last[1] - $pos; + $pos += $offset; + if ($need_table) { + $table[] = substr($haystack_offset, $pos, $length); + foreach ($splits as $match) $table[] = $match[0]; + } + } else $pos = false; + return array('pos' => $pos, 'len' => $length, 'tab' => $table); +} - // Get the location of the first match of the regular expression - $foo = microtime(); - $foo_len = strlen($foo); - $len = strlen($haystack); - $str = preg_replace($regex, $foo, $haystack, 1); - $length = $len - (strlen($str) - $foo_len); +/** + * Which, if any, of the strings in the array $substrs occurs at offset $offset + * in the string $str? + * If $flags contains GESHI_WHICHSS_MAXIMAL, then the largest of multiple + * matches will be returned, otherwise and by default: the first encountered. + * If $flags contains GESHI_WHICHSS_CASEINSENSITIVE then the comparison will be + * case-insensitive; otherwise and by default it will be case-sensitive. + * If $flags contains GESHI_WHICHSS_TRYREGEX then the remaining portion of any + * string in $substrs that starts with 'REGEX' will be treated as a (Perl- + * compatible) regular expression to match, anchored to the start of the string + * at $offset. Look-behind assertions that refer to parts of the string prior + * to $offset will not work. If $flags contains GESHI_WHICHSS_SKIPANCHORINSERT + * then the anchor insertion on each regex in $substr will not be performed - + * it will be assumed to have already been performed but in any case only + * matches at the start of the string will ever be returned. + * @return Null if no match is found, otherwise the matching substring, with + * case as in the $substrs element rather than the matching portion of $str. + */ +define('GESHI_WHICHSS_MAXIMAL', 1); +define('GESHI_WHICHSS_CASEINSENSITIVE', 2); +define('GESHI_WHICHSS_TRYREGEX', 4); +define('GESHI_WHICHSS_SKIPANCHORINSERT', 8); +function geshi_whichsubstr($str, $substrs, $offset = 0, $flags = 0) { + /* Constants */ + static $re_starter_c = 'REGEX'; + static $re_starter_len_c = 5/*strlen($re_starter_c)*/; - // Return match table if requested - if ($need_table) { - $matches = array(); - preg_match_all($regex, $haystack, $matches); - $i = 0; - $table = array(); - foreach ( $matches as $match ) { - $table[$i++] = (isset($match[0])) ? $match[0] : null; + $ret = null; + $max_len = -1; + foreach ($substrs as $substr) { + if (($flags & GESHI_WHICHSS_TRYREGEX) && + strncmp($substr,$re_starter_c,$re_starter_len_c)==0) { + $re = substr($substr, $re_starter_len_c); + if (!($flags & GESHI_WHICHSS_SKIPANCHORINSERT)) { + $re = geshi_anchor_re($re); + } + $haystack = $offset > 0 ? substr($str, $offset) : $str; + $match = preg_match($re, $haystack, $matches, PREG_OFFSET_CAPTURE) ? + $matches[0][0] : null; + $len = strlen($match); + /* This code is reached only if GESHI_WHICHSS_SKIPANCHORINSERT was + * specified without a pre-existing anchor and with a match that + * started beyond $offset. + */ + if ($match !== null && $matches[0][1]) $len = $match = null; + } else { + $len = strlen($substr); + if (!($flags & GESHI_WHICHSS_CASEINSENSITIVE)) { + $match = substr($str,$offset,$len) == $substr ? $substr : null; + } else if (strcasecmp(substr($str, $offset, $len), $substr) == 0) { + $match = $substr; + } else $match = null; } - } else { - $table = array(); + if ($match !== null) { + if (!($flags & GESHI_WHICHSS_MAXIMAL)) { + $ret = $match; + break; + } else if ($len > $max_len) { + $ret = $match; + $max_len = $len; + } + } } - return array('pos' => strpos($str, $foo), 'len' => $length, 'tab' => $table); + return $ret; } /** + * Safely inserts an anchor into the regex $regex so that it only matches at the + * start of the searched string. + * @return string The regex with anchor inserted. + */ +function geshi_anchor_re($regex) { + $delim = $regex{0}; + $endPos = strrpos($regex, $delim); + $endChars = substr($regex, $endPos); + return "$delim^(".substr($regex, 1, $endPos - 1).')'.$endChars; +} + +/** * @todo [blocking 1.1.5] Octal/hexadecimal numbers are common, so should have functions * for those, and make sure that integers/doubles do not collide * @access private @@ -236,9 +328,9 @@ // /** * Replace stripos() - * + * * This function lifted from the PHP_Compat PEAR package, and optimised - * + * * @author Aidan Lister <ai...@ph...>, Nigel McNie <ni...@ge...> * @version $Revision$ * @access private @@ -267,7 +359,7 @@ /** * Returns the GeSHi_Styler object used to help with parsing - * + * * @param boolean $force_new If true, forces the creation of * a new GeSHi_Parser object * @return GeSHi_Styler Copied: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES (from rev 1166, tags/RELEASE_1_1_2_ALPHA3/geshi-src/geshi/languages/c/NOTES) =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES (rev 0) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES 2008-05-18 15:33:07 UTC (rev 1167) @@ -0,0 +1,87 @@ +Here are several notes on C highlighting as originally contained as extended +comments within c.php. Mostly this relates to preprocessor-context +highlighting and the situations in which the C parser function +GeSHiCCodeParser::parseToken() adjusts it. + +== (Un)Highlighted keywords in the preprocessor context == + +It might seem questionable at first whether declarator/type/qualifier keywords, +standard functions and standard macros or objects will ever occur, thus +requiring highlighting, within some preprocessor directives - namely #(el)if, +#ifdef, #ifndef and #undef. They can and do occur in practice though because +these directives can be used to test whether at preprocessor level the keyword, +type or function in question has been subverted (or for a function, whether +it's been legitimately defined as a macro), and/or to undo or change that +subversion; for #if/#elif, sizeof should be highlighted in any case - it's been +categorised as a standard function for GeSHi's purposes. + +For #(el)if, a type might also appear as the subject of sizeof. + +It's also debatable whether such tokens should be highlighted within #error and +#pragma directives - it seems most appropriate that they are not, because within +those directives their occurrence can be likened to their appearance within a +comment. GeSHiCCodeParser::parseToken() therefore adjusts those contexts; +their highlighting when the parser is disabled is tolerable as a minor glitch. + +It's less debatable that within a #include filename, these keywords should not +be highlighted. That's handled in GeSHiCCodeParser::parseToken() for <> +includes - quoted includes are already protected by the string_literal context +(which parseToken() reclassifies). It's borderline tolerable that this +incorrect highlighting will appear when the parser is disabled. + +Within a #include where the filename is specified by a macro, the only keywords +that should be highlighted out of the list at the top of this section are: +standard macros (because they might be used in a stringising macro "call"), any +standard functions that are implementable as macros (for the same reason), +"sizeof" (because it might be used to generate an argument for a macro "call") +and types (but not qualifiers) where they appear as the subject of sizeof. The +remainder have no meaning in preprocessor macro-"call" context. Separating out +"implementable-as-a-macro" from the other standard functions is a longer-term +future task to complete alongside comprehensively filling out what's missing +from the keyword lists. Separating qualifiers from types is another task to +consider. To start with, GeSHiCCodeParser::parseToken() disables highlighting +for the context 'declarator-keyword' within #include:s where the filename is +specified by a macro, and /all/ highlighting is disabled for the macro name +itself - i.e. highlighting applies only to macro arguments. + +The same reasoning of the above paragraph can be applied to the #line directive +where its "arguments" are specified by a macro: GeSHiCCodeParser::parseToken() +similarly disables highlighting in that situation. + +== Symbols in C preprocessor directives == + +Not all of the symbols added by the call: + $context->addSymbolGroup(geshi_c_get_standard_symbols(), + 'c/c/preprocessor/symbol'); +have meaning for all preprocessor directives and in some directives they are +illegal. This GeSHi C module assumes well-formed input code so illegal +occurrences need not concern it. + +In #(el)if directives, any symbol except the semicolon can legally occur. +At first it might seem that & has no place either because at preprocessing stage +no objects exist to take an address of, but & can also act as a bitwise +operator or be part of the logical && operator. Due to the lack of objects it +might also at first seem that [] has no use, however it can be applied to +string literals for esoteric uses in a preprocessor constant such as this +expression equating to 1: +"abcd"[1] == 'b' +A semicolon though is only used to end single statements in code - this can't +apply to a constant preprocessor expression. + +In #include and #line directives, the header filename and new effective source +file name (respectively) may be specified by a macro. A macro may take a +constant preprocessor expression as an argument, so by this reasoning it can be +seen that within #include and #line directives the same set of symbols can +occur as within an #(el)if directive - namely, anything except a semicolon. + +In a #define, even a semicolon can occur because the macro can substitute for +code. + +#ifdef, #ifndef, #undef, #endif and #else do not allow any symbol except by +proxy for comments and line continuation slashes. + +Likewise for #error and #pragma except that any symbol could occur as part of +the subsequent (unquoted) freeform text. These should not be highlighted, and +thus GeSHiCCodeParser::parseToken() recontextualises them so that they aren't +highlighted. Their highlighting when the parser is disabled is tolerable as a +minor glitch. Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php =================================================================== --- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php 2008-05-18 15:24:09 UTC (rev 1166) +++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php 2008-05-18 15:33:07 UTC (rev 1167) @@ -54,33 +54,34 @@ $context->addChild('multi_comment'); $context->addChild('single_comment'); $context->addChild('string_literal', 'string'); + $context->addChild('widestring_literal', 'string'); $context->addChild('character_constant', 'singlechar'); + $context->addChild('widecharacter_constant', 'singlechar'); $context->addChild('preprocessor', 'code'); - + $context->addKeywordGroup(geshi_c_get_ctlflow_keywords(), 'ctlflow-keyword', true, geshi_c_get_ctlflow_keywords_url()); $context->addKeywordGroup(geshi_c_get_declarator_keywords(), 'declarator-keyword', true, geshi_c_get_declarator_keywords_url()); - + $context->addKeywordGroup(geshi_c_get_types_and_qualifiers(), 'typeorqualifier', true, geshi_c_get_types_and_qualifiers_url()); - + $context->addKeywordGroup(geshi_c_get_standard_functions(), 'stdfunction', true, geshi_c_get_standard_functions_url()); - + $context->addKeywordGroup(geshi_c_get_standard_macros_and_objects(), 'stdmacroorobject', true, geshi_c_get_standard_macros_and_objects_url()); - + $context->addSymbolGroup(geshi_c_get_standard_symbols(), 'symbol'); - + $context->useStandardIntegers(); $context->useStandardDoubles(array('chars_after_number' => array('f','l'))); - + $context->addObjectSplitter(geshi_c_get_structure_access_symbols(), 'member', 'symbol'); $context->setComplexFlag(GESHI_COMPLEX_TOKENISE); - } function geshi_c_c_multi_comment (&$context) @@ -91,45 +92,60 @@ function geshi_c_c_single_comment (&$context) { - $context->addDelimiters('//', 'REGEX#(?<!\\\)\n#'); + $context->addDelimiters('//', "\n"); $context->setComplexFlag(GESHI_COMPLEX_PASSALL); - // without this, detection of a following preprocessor directive is - // inhibited [due to changes this might no longer apply] + /* Without this, and when the comment occurs at the end of a preprocessor + * directive, any immediately subsequent preprocessor directive is treated + * as a continuation of the first one. */ $context->parseDelimiters(GESHI_CHILD_PARSE_LEFT); } +/* A (wide)string literal may be continued to the next line through the use of a + * trailing \ but otherwise multiline strings are illegal. This code doesn't + * attempt to mark erroroneous multiline strings, and slash-continuation is + * handled generically in GeSHiCCodeParser::parseToken(). This code does + * terminate strings on newlines though due to the legality of the appearance + * of unmatched double quote marks in #error and #pragma directives. + * GeSHiCCodeParser::parseToken() later unhighlights such unterminated strings + * but they can't be allowed to continue over the line otherwise the + * #error/#pragma directive will be incorrectly continued over multiple lines. + * prior to GeSHiCCodeParser receiving it. + */ function geshi_c_c_string_literal (&$context) { - /* - * A string literal may be continued to the next line with a trailing \ but - * otherwise multiline strings are illegal; we don't attempt to mark that - * error here though. - */ - $context->addDelimiters('"', '"'); - - $context->setEscapeCharacters('\\'); - /** @todo string literals and character constants may be immediately - * preceded by a capital L to indicate a wide-character constant and it - * would be nice to include that in the highlighting. - */ - $context->setCharactersToEscape(array("'", '?', 'a', 'b', 'f', - 'v', 'n', 'r', 't', 'REGEX#[0-7]{1,3}#', - 'REGEX#x[0-9a-f]{1,}#i', '\\', '"')); + geshi_c_base_string($context, '"', array('"', 'REGEX#(?=\n)#'), false); } - +function geshi_c_c_widestring_literal (&$context) +{ + geshi_c_base_string($context, 'L"', array('"', 'REGEX#(?=\n)#'), true); +} function geshi_c_c_character_constant (&$context) { - $context->addDelimiters("'", "'"); + geshi_c_base_singlechar($context, "'", "'", false); + $context->setDisallowEmptyChars(); +} +function geshi_c_c_widecharacter_constant (&$context) +{ + geshi_c_base_singlechar($context, "L'", "'", true); + $context->setDisallowEmptyChars(); +} - $context->setEscapeCharacters('\\'); +function geshi_c_base_string (&$context, $delim_start, $delim_end, $delim_cs) { + $context->addDelimiters($delim_start, $delim_end, $delim_cs); + $context->addEscapeGroup('\\', array("'", '"', '?', '\\', 'a', 'b', 'f', + 'n', 'r', 't', 'v', 'REGEX#([0-7]{1,3}|x[0-9a-f]{1,})#i')); + $context->setComplexFlag(GESHI_COMPLEX_PASSALL); +} - /** @todo same todo as for geshi_c_c_string_literal(). */ - $context->setCharactersToEscape(array("'", '?', 'a', 'b', 'f', - 'v', 'n', 'r', 't', 'REGEX#[0-7]{1,3}#', - 'REGEX#x[0-9a-f]{1,}#i', '\\', '"')); +function geshi_c_base_singlechar (&$context, $delim_start, $delim_end, $delim_cs) { + $context->addDelimiters($delim_start, $delim_end, $delim_cs); + $context->setEscapeCharacters('\\'); + $context->setCharactersToEscape(array("'", '"', '?', '\\', 'a', 'b', 'f', + 'n', 'r', 't', 'v', 'REGEX#([0-7]{1,3}|x[0-9a-f]{1,})#i')); + $context->setComplexFlag(GESHI_COMPLEX_PASSALL); } -/** +/* * Duplicate these functions for the preprocessor simply so that they can have * a different highlighting context. */ @@ -143,127 +159,60 @@ } function geshi_c_c_preprocessor_string_literal (&$context) { - geshi_c_c_string_literal($context); + geshi_c_c_string_literal ($context); } +function geshi_c_c_preprocessor_widestring_literal (&$context) +{ + geshi_c_c_widestring_literal ($context); +} function geshi_c_c_preprocessor_character_constant (&$context) { geshi_c_c_character_constant ($context); } +function geshi_c_c_preprocessor_widecharacter_constant (&$context) +{ + geshi_c_c_widecharacter_constant ($context); +} function geshi_c_c_preprocessor (&$context) { - /** - * A preprocessing directive beginning with a # must occur at the start + /* A preprocessing directive beginning with a # must occur at the start * of a line, but may optionally be preceded by whitespace. The hash may - * optionally be followed by whitespace in the same manner, after which - * the actual directive keyword is specified. Finally though, a hash - * without a following directive is allowed as a 'null directive'. + * optionally be followed by whitespace, after which the actual directive + * keyword is specified. Finally though, a hash without a following + * directive is allowed as a 'null directive'. * * There is also a single preprocessing directive (_Pragma) that follows - * the same rules but is not preceded by a hash + * the same rules but that is not preceded by a hash * * The list of non-newline whitespace characters recognised by C and - * used in the r.e. below is: [ \t\f\v] - ... [truncated message content] |