[GeSHi-commit] SF.net SVN: geshi: [1167] branches/RELEASE_1_1_X_DEVEL/geshi-src

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 1167
          http://geshi.svn.sourceforge.net/geshi/?rev=1167&view=rev
Author:   benbe
Date:     2008-05-18 08:33:07 -0700 (Sun, 18 May 2008)

Log Message:
-----------
add: Building up GeSHi 1.1.X developement branch using old releases

Modified Paths:
--------------
    branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/class.geshiccodeparser.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/common.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/codeworker/codeworker.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/css/css.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/delphi/common.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/delphi/delphi.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/java/java.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/javascript/javascript.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/php/class.geshiphpcodeparser.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/php/common.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/sql/sql.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/vhdl/vhdl.php
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/themes/default/c/c.php

Added Paths:
-----------
    branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES

Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php
===================================================================

--- branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php	2008-05-18 15:24:09 UTC (rev 1166)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/class.geshi.php	2008-05-18 15:33:07 UTC (rev 1167)
@@ -42,7 +42,7 @@
 $geshi_old_reporting_level = error_reporting(E_ALL);
 
 /** GeSHi Version */
-define('GESHI_VERSION', '1.1.2alpha2');
+define('GESHI_VERSION', '1.1.2alpha3');
 
 /** Set the correct directory separator */
 define('GESHI_DIR_SEP', ('WIN' != substr(PHP_OS, 0, 3)) ? '/' : '\\');
@@ -318,7 +318,7 @@
      *   language. If caching of the root context is enabled, then this time will likely
      *   be close to zero if you are calling this method after second and subsequent calls
      *   to {@link GeSHi::parseCode()}.</li>
-     *   <li>If youpass <b>'parse'</b>, you will get the time it took to parse the last
+     *   <li>If you pass <b>'parse'</b>, you will get the time it took to parse the last
      *   time {@link GeSHi::parseCode()} was called.
      * </ul>
      *
@@ -382,6 +382,43 @@
     }
     
     // }}}
+    // {{{ getSupportedLanguages()
+
+    /**
+     * @todo document this function
+     * @todo This and other methods share a lot of directory traversal
+     * functionality, which could be split out somehow.
+     * @todo actually, this should be implemented using a registry
+     */
+    function getSupportedLanguages ($return_human = false)
+    {
+        $languages = array();
+
+        $ignore = array('.', '..', 'CVS');
+        $dh = opendir(GESHI_LANGUAGES_ROOT);
+        while (false !== ($dir = readdir($dh))) {
+            if (in_array($dir, $ignore) || is_file(GESHI_LANGUAGES_ROOT . $dir)) continue;
+            // Check the directory for the dialect files
+            $ldh = opendir(GESHI_LANGUAGES_ROOT . $dir);
+            while (false !== ($file = readdir($ldh))) {
+                if (in_array($file, $ignore) || is_dir(GESHI_LANGUAGES_ROOT . "$dir/$file") || substr($file, -4) != '.php') continue;
+                
+                // Found a language file
+                $file = substr($file, 0, -4);
+                if ('common' == $file || 'class' == substr($file, 0, 5)) continue;
+
+                if ($return_human) {
+                    $languages["$dir/$file"] = GeSHi::getHumanLanguageName("$dir/$file");
+                } else {
+                    $languages[] = "$dir/$file";
+                }
+            }
+        }
+
+        return $languages;
+    }
+
+    // }}}  
     // {{{ getSupportedThemes()
     
     /**
@@ -504,6 +541,26 @@
     }
     
     // }}}
+    // {{{ getHumanLanguageName()
+    
+    /**
+     * Given a language name, return a human version of it
+     * 
+     * @param  string $language The language name to get the human version of
+     * @return string The human language name, or <kbd>false</kbd> if the
+     *                language does not exist
+     * @static
+     * @todo actually implement this function
+     * @since 1.1.2
+     */
+    function getHumanLanguageName ($language)
+    {
+        $human_name = '';
+        $language = GeSHi::_clean($language);
+        return $language;
+    }
+    
+    // }}}
     // {{{ getHumanThemeName()
     
     /**

Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php
===================================================================
--- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php	2008-05-18 15:24:09 UTC (rev 1166)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshicontext.php	2008-05-18 15:33:07 UTC (rev 1167)
@@ -786,8 +786,6 @@
                 $length   = $data['len'];
                 if (isset($data['tab'])) {
                     geshi_dbg('Table: ' . print_r($data['tab'], true));
-                    $this->_startRegexTable = $data['tab'];
-                    $delimiter = $data['tab'][0];
                 }
                 
                 if (false !== $position) {
@@ -798,6 +796,10 @@
                         $first_position = $position;
                         $first_length   = $length;
                         $first_key      = $key;
+                        if (isset($data['tab'])) {
+                            $this->_startRegexTable = $data['tab'];
+                            $delimiter = $data['tab'][0];
+                        }
                         $first_dlm      = $delimiter;
                     }
                 } else {

Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php
===================================================================
--- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php	2008-05-18 15:24:09 UTC (rev 1166)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshisinglecharcontext.php	2008-05-18 15:33:07 UTC (rev 1167)
@@ -6,10 +6,10 @@
  *   Author: Nigel McNie
  *   E-mail: ni...@ge...
  * </pre>
- * 
+ *
  * For information on how to use GeSHi, please consult the documentation
  * found in the docs/ directory, or online at http://geshi.org/docs/
- * 
+ *
  * This program is part of GeSHi.
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -21,160 +21,236 @@
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
- * 
+ *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  *
  * @package    geshi
  * @subpackage core
- * @author     Nigel McNie <ni...@ge...>
+ * @author     Nigel McNie <ni...@ge...>;
+ *             http://clc-wiki.net/wiki/User:Netocrat
  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL
  * @copyright  (C) 2004 - 2006 Nigel McNie
  * @version    $Id$
- * 
+ *
  */
 
 /**
  * The GeSHiSingleCharContext class. This class extends GeSHiContext to handle
  * "single character" strings - strings that are only one character long, like
  * in java.
- * 
- * Note that this functionality assumes that the delimiters for single character
- * contexts are just one character long (a sensible assumption made for speed
- * reasons). If required in the future this class could support longer delimiters.
- *  
+ *
+ * Escape sequences need not be limited to one character and may be REGEX-
+ * specified, to allow for situations such as C's octal and hexadecimal escapes,
+ * e.g. '\xFF'.  Likewise for the start and end delimiter, and the escape
+ * "character".  This is handy for situations such as C's widestring
+ * characters, which are prefixed by an L.
+ *
  * @package    geshi
  * @subpackage core
- * @author     Nigel McNie <ni...@ge...>
+ * @author     Nigel McNie <ni...@ge...>; Netocrat
  * @since      1.1.1
  * @version    $Revision$
  * @see        GeSHiContext
  */
 class GeSHiSingleCharContext extends GeSHiContext
 {
-    
+
     // {{{ properties
-    
+
     /**#@-
      * @access private
      */
+    /** The parsed data when getContextStartData() is successful. */
+    var $_characterLen;
+    var $_endDelimiterLen;
+    var $_isEscapeSeq;
+
+    var $_disallowEmpty;
+
+    /** Characters that start an escape sequence... */
     var $_escapeCharacters;
-    
-    // Characters that should be escaped
+    /** ...and the valid escape sequences that can follow. */
     var $_charsToEscape;
-        
+
     /**#@-*/
-    
+
     // }}}
     // {{{ setEscapeCharacters()
-    
+    /**
+     * Specifies each "character" that should be interpreted as the start of an
+     * escape sequence when it occurs immediately subsequent to a start
+     * delimiter.  Each "character" may be greater than one actual character in
+     * length, and may optionally be specified by a REGEX-string - look-behind
+     * assertions on such regexes are not supported.
+     * @param Mixed Array of strings or single string.
+     */
     function setEscapeCharacters ($chars)
     {
         $this->_escapeCharacters = (array) $chars;
     }
-    
+
     // }}}
     // {{{ setCharactersToEscape()
-    
+    /**
+     * Specifies all escape sequences that are valid following any of the
+     * escape characters.  Each escape sequence may be greater than one
+     * character in length and may be specified by a REGEX-string - look-behind
+     * assertions on such regexes are not supported.
+     * @param Mixed Array of strings or single string.
+     */
     function setCharactersToEscape ($chars)
     {
-        $this->_charsToEscape = (array) $chars;
+        static $re_starter_c = 'REGEX';
+        static $re_starter_len_c = 5/*strlen($re_starter_c)*/;
+        $this->_charsToEscape = array();
+        /* Save a little time and processing by anchoring all regexes now,
+         * rather than each time geshi_whichsubstr() is called.
+         */
+        foreach ((array)$chars as $escSeq) {
+            if (strncmp($escSeq, $re_starter_c, $re_starter_len_c) == 0) {
+                $re = substr($escSeq, $re_starter_len_c);
+                $re = geshi_anchor_re($re);
+                $this->_charsToEscape[] = $re_starter_c.$re;
+            } else $this->_charsToEscape[] = $escSeq;
+        }
     }
-    
+
     // }}}
+    // {{{ setDisallowEmpty()
+    /**
+     * Call this to specify whether to disallow empty characters - e.g. in C, ''
+     * is invalid.  By default empty characters are allowed.  The default
+     * parameter value of this function is true so e.g. in C's case this can be
+     * called simply as $context->setDisallowEmptyChars().
+     * @param boolean $value Defaults to true.
+     */
+    function setDisallowEmptyChars ($value = true)
+    {
+        $this->_disallowEmpty = $value;
+    }
+
+    // }}}
     // {{{ getContextStartData()
     /**
      * GetContextStartData
-     * 
-     * Overridden to check if this context should even start. If we can't find
-     * a valid end-of-string character in the correct place this context should
-     * not even start.
-     * 
+     *
+     * Overrides the parent method to check whether this context should even
+     * start.  Checks for a complete character including start and end
+     * delimiters and valid contained character, which might be an escape
+     * sequence.  Stores all data found so that it may be used by
+     * _getContextEndData() and _addParseData(), to avoid reparsing.
+     *
      * @param string $code
      * @param string $start_of_context
      */
     function getContextStartData ($code, $start_of_context)
     {
-        geshi_dbg('GeSHiSingleCharContext::getContextStartData(' . $this->_contextName . ', ' . $start_of_context . ')');
-        
+        geshi_dbg('GeSHiSingleCharContext::getContextStartData(' .
+          $this->_contextName . ', ' . $start_of_context . ')');
+
         $offset = 0;
+        $data = null;
         while (true) {
+            /* For retries, strip to just past the last failed start. */
+            if ($data != null) {
+                $code = substr($code, $data['pos'] + 1);
+                $offset += $data['pos'] + 1;
+            }
+
             $data = parent::getContextStartData($code, $start_of_context);
-            
-            // First, if no match then bail
-            if (-1 === $data['pos']) {
-                return $data;
-            }
-            
-            $first_position = $data['pos'];
-            $first_length   = $data['len'];
-            $first_key      = $data['key'];
-            $first_dlm      = $data['dlm'];
-            
-            // Check for empty character
-            // WARN: claim here that delimiters are only one char long!
-            if (in_array(substr($code, $first_position + 1, 1), $this->_contextDelimiters[$first_key][1])) {
-                // Nothing wrong with this
+
+            /* First, if no match then bail */
+            if (-1 === $data['pos']) break;
+
+            /* Check for empty character */
+            $end_delim = geshi_whichsubstr($code, $this->_contextDelimiters[
+              $data['key']][1], $data['pos'] + $data['len'],
+              GESHI_WHICHSS_MAXIMAL|GESHI_WHICHSS_TRYREGEX);
+            if ($end_delim !== null) {
+                if (!$this->_disallowEmpty) {
+                    $data['pos'] += $offset;
+                    $this->_characterLen = 0;
+                    $this->_endDelimiterLen = strlen($end_delim);
+                    $this->_isEscapeSeq = false;
+                    break;
+                } else {
+                    /* Support a (hypothetical) syntax where empty characters
+                     * are not permitted but where the end delimiter doubles as
+                     * an escape character. */
+                    $empty = true;
+                }
+            } else $empty = false;
+
+            /* Check for the start of an escape sequence */
+            $esc_start = geshi_whichsubstr($code, $this->_escapeCharacters,
+              $data['pos'] + $data['len'], GESHI_WHICHSS_MAXIMAL|
+              GESHI_WHICHSS_TRYREGEX);
+            $esc_len = strlen($esc_start);
+            if ($esc_start !== null) {
+                /* Check for a valid full escape sequence; allow regexes
+                 * that match sequences of length > 1.  Match the most
+                 * inclusive char/regex. */
+                $start = $data['pos'] + $data['len'] + $esc_len;
+                $esc_seq = geshi_whichsubstr($code, $this->_charsToEscape,
+                  $start, GESHI_WHICHSS_MAXIMAL|GESHI_WHICHSS_TRYREGEX|
+                  GESHI_WHICHSS_SKIPANCHORINSERT);
+                if ($esc_seq === null) continue;
+                else $char_len = $esc_len + strlen($esc_seq);
+            } else if ($empty) continue;
+            else $char_len = 1; /* Possible single unescaped character */
+
+            $final_char_offset = $data['len'] + $char_len;
+
+            /* Check for an end delimiter and if found, return successfully */
+            $end_delim = geshi_whichsubstr($code, $this->_contextDelimiters[
+              $data['key']][1], $data['pos'] + $final_char_offset,
+              GESHI_WHICHSS_MAXIMAL|GESHI_WHICHSS_TRYREGEX);
+            if ($end_delim !== null) {
                 $data['pos'] += $offset;
-                return $data;
+                $this->_characterLen = $char_len;
+                $this->_endDelimiterLen = strlen($end_delim);
+                $this->_isEscapeSeq = ($esc_start !== null);
+                break;
             }
-            
-            // Check for single alone character
-            $final_char_offset = (in_array(substr($code, $first_position + 1, 1), $this->_escapeCharacters))
-                ? 3 : 2;
-            if (in_array(substr($code, $first_position + $final_char_offset, 1),
-                $this->_contextDelimiters[$first_key][1])) {
-                $data['pos'] += $offset;
-                return $data;
-            }
-            
-            
-            // End: strip to just past where the character failed to start and try again
-            $code = substr($code, $first_position + 1);
-            $offset += $first_position + 1;
         }
-        
-        return array('pos' => $first_position, 'len' => $first_length,
-                     'key' => $first_key, 'dlm' => $first_dlm);
+        return $data;
     }
-    
+
     // }}}
     // {{{ _getContextEndData()
-    
+
     /**
-     * In this case we don't need to worry about much because we have made sure in
-     * _getContextStartData that we are starting in the right place.
+     * In this case we don't need to worry about much because we have made sure
+     * in _getContextStartData that we are starting in the right place.
      */
-    function _getContextEndData ($code, $context_open_key, $context_opener, $beginning_of_context)
+    function _getContextEndData ($code, $context_open_key, $context_opener,
+      $beginning_of_context)
     {
-        $pos = 1;
-        $first_char = substr($code, 0, 1);
-        if (in_array($first_char, $this->_escapeCharacters)) {
-            $pos = 2;
-        } elseif (in_array($first_char, $this->_contextDelimiters[$context_open_key][1])) {
-            $pos = 0;
-        }
-        return array('pos' => $pos, 'len' => 1 /*see WARN above*/, 'dlm' => '');
+        return array('pos' => $this->_characterLen,
+                     'len' => $this->_endDelimiterLen,
+                     'dlm' => '');
     }
-    
+
     // }}}
     // {{{ _addParseData()
-    
+
     /**
      * Overrides _addParseData to add escape characters also
      */
     function _addParseData ($code, $first_char_of_next_context = '')
     {
-        geshi_dbg('GeSHiSingleCharContext::_addParseData(' . substr($code, 0, 15) . '...)');       
-        if (in_array(substr($code, 0, 1), $this->_escapeCharacters)) {
+        geshi_dbg('GeSHiSingleCharContext::_addParseData(' .
+          substr($code, 0, 15) . '...)');
+        if ($this->_isEscapeSeq) {
             $this->_styler->addParseData($code, $this->_contextName . '/esc',
                 $this->_getExtraParseData(), $this->_complexFlag);
         } else {
             parent::_addParseData($code, $first_char_of_next_context);
         }
     }
-    
+
     // }}}
 
 }

Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php
===================================================================
--- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php	2008-05-18 15:24:09 UTC (rev 1166)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/classes/class.geshistringcontext.php	2008-05-18 15:33:07 UTC (rev 1167)
@@ -54,115 +54,237 @@
     /**#@-
      * @access private
      */
-    var $_escapeCharacters = array();
     
-    // Characters that should be escaped
-    var $_charsToEscape = array();
+    /**
+     * Escape character groups.
+     * 
+     * @var array
+     */
+    var $_escapeGroups = array();
     
     /**#@-*/
     
     // }}}
-    // {{{ setEscapeCharacters()
+    // {{{ addEscapeGroup()
     
     /**
-     * Sets the characters that are used to escape other characters in a string
+     * Adds an escape group to this string context.
+     * 
+     * An escape group consists of a group of characters that are escape
+     * characters, and another group of characters or regexes that are
+     * the characters to escape. You can also specify a context name for
+     * the escaped characters.
+     *
+     * The escape characters MUST be one character in length, and are
+     * automatically assumed to escape themselves.
+     * 
+     * @param mixed  $escape_characters    The characters that escape others
+     * @param mixed  $characters_to_escape The characters/regexes that are
+     *                                     escaped
+     * @param string $context_name         A name for the escaped characters
      */
-    function setEscapeCharacters ($chars)
+    function addEscapeGroup ($escape_characters,
+        $characters_to_escape = array(), $context_name = 'esc')
     {
-        $this->_escapeCharacters = (array) $chars;
+        // Sanity checking
+        $escape_characters = (array) $escape_characters;
+        $characters_to_escape = (array) $characters_to_escape;
+        foreach ($escape_characters as $char) {
+            if (strlen($char) != 1) {
+                trigger_error('GeSHiStringContext::addEscapeGroup(): malformed'
+                    . ' language file: cannot have escape characters that are'
+                    . ' longer than one character in length');
+            }
+            if (!in_array($char, $characters_to_escape)) {
+                $characters_to_escape[] = $char;
+            }
+        }
+
+        $this->_escapeGroups[] = array(
+            $escape_characters,
+            $characters_to_escape,
+            $context_name
+        );
     }
     
     // }}}
-    // {{{ setCharactersToEscape()
+    // {{{ _getContextEndData()
     
-    function setCharactersToEscape ($chars)
-    {
-        $this->_charsToEscape = (array) $chars;
-    }
-    
-    // }}}
-    
     /**
-     * GetContextEndData
+     * Finds the end of a string context, taking the escape characters into
+     * account.
+     * 
+     * @param string $code             The code to look for the end of the
+     *                                 context in
+     * @param int    $context_open_key The key in the array of delimiters
+     *                                 which corresponds to the opener
+     * @param string $context_opener   The actual opener for the string
      */
     function _getContextEndData ($code, $context_open_key, $context_opener)
     {
-        geshi_dbg('GeSHiStringContext::_getContextEndData(' . $this->_contextName . ', ' . $context_open_key . ', ' . $context_opener . ')');
+        geshi_dbg('GeSHiStringContext::_getContextEndData('
+            . $this->_contextName . ')');
         $this->_lastOpener = $context_opener;
         $ender_data = array();
         
         foreach ($this->_contextDelimiters[$context_open_key][1] as $ender) {
-            geshi_dbg('  Checking ender: ' . $ender);
-
             // Prepare ender regexes if needed
             $ender = $this->_substitutePlaceholders($ender);
-            geshi_dbg('  ender after substitution: ' . $ender);
+            geshi_dbg('  Checking ender: ' . str_replace("\n", '\n', $ender));
 
-            $pos = 0;
+            $tmp_str = $code;
+            $current_pos = 0;
+
             while (true) {
-                $pos = geshi_get_position($code, $ender, $pos);
-                if (false === $pos) {
+                geshi_dbg("@btop of loop; current_pos = $current_pos; str="
+                    . substr($tmp_str, 0, 10));
+                $pos_data = geshi_get_position($tmp_str, $ender);
+                if (false === $pos_data['pos']) {
+                    geshi_dbg("could not find ender $ender in string "
+                        . substr($tmp_str, 0, 10));
                     break;
                 }
-                $len = $pos['len']; 
-                $pos = $pos['pos'];
-                
-                $possible_string = substr($code, 0, $pos);            
-                geshi_dbg('  String might be: ' . $possible_string);
-                
-                $not_escaped = true;
-                if ($this->_escapeCharacters) {
-                    foreach ($this->_escapeCharacters as $escape_char) {
-                        // remove escaped escape characters
-                        $possible_string = str_replace($escape_char . $escape_char, '', $possible_string);
-                    }
-                    
-                    geshi_dbg('  String with double escapes removed: ' . $possible_string);
+                geshi_dbg("found ender $ender at position " . $pos_data['pos']);
 
-                    foreach ($this->_escapeCharacters as $escape_char) {
-                        if (substr($possible_string, -1) == $escape_char) {
-                            $not_escaped = false;
-                            break;
+                // While we may have found an ender, it might be escaped.
+                // Finding out for sure whether it is escaped is harder than
+                // it may initially seem - we have to check each previous
+                // character to see if it escapes the one after it, and flip
+                // a flag which detects whether the initial character is
+                // escaped, or whether the character before the initial
+                // character is escaped (and thus the ender we found is the
+                // real thing).
+                $i = $pos_data['pos'] - 1;
+                if ($i >= 0) {
+                    $current_char = substr($tmp_str, $i, 1);
+                    $after_char   = substr($tmp_str, $i + 1, 1);
+                    geshi_dbg("checking char $current_char to see if it"
+                       . " escapes the char $after_char");
+                    if ($this->_charEscapesChar($current_char, $after_char)) {
+                        geshi_dbg("  it does! Might not have found the ender");
+                        $found_ender = true;
+                        geshi_dbg('checking whether ' . substr($tmp_str, $i, 1)
+                            . ' escapes ' . substr($tmp_str, $i + 1, 1));
+                        while (($i == 0 && $this->_isEscapeChar(substr($tmp_str, $i, 1))) ||
+                            $i > 0
+                            && $this->_charEscapesChar(substr($tmp_str, $i, 1),
+                                substr($tmp_str, $i + 1, 1))) {
+                            $found_ender = !$found_ender;
+                            if (0 == $i) {
+                                geshi_dbg('reached start of string and char is escape');
+                            } else {
+                                geshi_dbg(substr($tmp_str, $i, 1) . ' escapes '
+                                . substr($tmp_str, $i + 1, 1) . ': found_ender='
+                                . $found_ender);
+                            }
+                            --$i;
                         }
-                        
-                        if ($escape_char == $ender
-                            && substr($code, $pos + 1, 1) == $escape_char) {
-                            // We have encountered the case where a string
-                            // has its own ender as a delimiter and as an
-                            // escape character
-                            $not_escaped = false;
-                            break;
+                        geshi_dbg('finished: found_ender=' . $found_ender);
+                        if (!$found_ender) {
+                            geshi_dbg('we did NOT find ender, it was escaped');
+                            $current_pos += $pos_data['pos'] + 1;
+                            $tmp_str = substr($tmp_str, $pos_data['pos'] + 1);
+                            continue;
                         }
+                        geshi_dbg('Found ender since the last char is escaped');
                     }
+                    else {
+                        geshi_dbg(" does  not seem to escape the next char");
+                    }
                 }
-                
-                if ($not_escaped) {
-                    // We may have found the correct ender. If we haven't, then this string
-                    // never ends and we will set the end position to the length of the code
-                    // substr($code, $pos, 1) == $ender
-                    $endpos = geshi_get_position($code, $ender, $pos);
-                    geshi_dbg('  position of ender: ' . $endpos['pos']);
-                    $pos = (false !== $pos && $endpos['pos'] === $pos) ? $pos : strlen($code);
-                    if (!$ender_data || $ender_data['pos'] > $pos) {
-                        $ender_data = array('pos' => $pos, 'len' => $len, 'dlm' => $ender);
+
+                if ($pos_data['pos'] != strlen($tmp_str)
+                    && $this->_charEscapesChar($ender,
+                        substr($tmp_str, $pos_data['pos'] + 1, 1))) {
+                    // We did not find the ender
+                    geshi_dbg('ender is escaping the next char - '
+                        . substr($tmp_str, $pos_data['pos'] + 1, 1));
+                    $current_pos += $pos_data['pos'] + 1 + $pos_data['len'];
+                    $tmp_str = substr($tmp_str, $pos_data['pos'] + 1
+                        + $pos_data['len']);
+                    continue;
+                }
+                else {
+                    geshi_dbg("Not escaped or escaping: Found at position "
+                        . $pos_data['pos']);
+                    if (!$ender_data || $pos_data['pos'] < $ender_data['pos']) {
+                        geshi_dbg('earliest');
+                        $ender_data['pos'] = $pos_data['pos'] + $current_pos;
+                        $ender_data['dlm'] = $ender;
+                        $ender_data['len'] = $pos_data['len'];
                     }
+                    
                     break;
                 }
-                
-                // else, start further up
-                ++$pos;
             }
         }
         geshi_dbg('Ender data: ' . print_r($ender_data, true));
         return ($ender_data) ? $ender_data : false;
     }
     
+    // }}}
+    // {{{ _charEscapesChar()
+
     /**
-     * Overrides addParseData to add escape characters also
+     * Returns true if $escape_char escapes $char_to_escape.
+     *
+     * @param string $escape_char    The escape character
+     * @param string $char_to_escape The character being escaped
+     * @return boolean
      */
+    function _charEscapesChar ($escape_char, $char_to_escape)
+    {
+        static $result_table = array();
+        if (isset($result_table[$escape_char][$char_to_escape])) {
+            return $result_table[$escape_char][$char_to_escape];
+        }
+
+        foreach ($this->_escapeGroups as $group) {
+            if (in_array($escape_char, $group[0])) {
+                return $result_table[$escape_char][$char_to_escape]
+                    = in_array($char_to_escape, $group[1]);
+            }
+        }
+
+        return $result_table[$escape_char][$char_to_escape] = false;
+    }
+
+    // }}}
+    // {{{ _isEscapeChar()
+
+    /**
+     * Returns true if $escape_char is an escape character in any group.
+     *
+     * @param string $escape_char The escape character
+     * @return boolean
+     */
+    function _isEscapeChar ($escape_char)
+    {
+        static $result_table = array();
+        if (isset($result_table[$escape_char])) {
+            return $result_table[$escape_char];
+        }
+
+        foreach ($this->_escapeGroups as $group) {
+            if (in_array($escape_char, $group[0])) {
+                return $result_table[$escape_char] = true;
+            }
+        }
+        return $result_table[$escape_char] = false;
+    }
+
+    // }}}
+    // {{{ _addParseData()
+    
+    /**
+     * Overrides addParseData to add escape characters also.
+     * 
+     * @param string $code
+     * @param string $first_char_of_next_context
+     */
      function _addParseData ($code, $first_char_of_next_context = '')
      {
-        geshi_dbg('GeSHiStringContext::_addParseData(' . substr($code, 0, 15) . '...)');
+        geshi_dbg('GeSHiStringContext::_addParseData(' . substr($code, 0, 15));
         
         $length = strlen($code);
         $string = '';
@@ -171,22 +293,30 @@
             geshi_dbg('Char: ' . $char);
             $skip = false;
             
-            foreach ($this->_escapeCharacters as $escape_char) {
-                $len = 1;
-                if ($char == $escape_char && (false !== ($len = $this->_shouldBeEscaped(substr($code, $i + 1))))) {
-                    geshi_dbg('Match: len = ' . $len);
-                    if ($string) {
-                        $this->_styler->addParseData($string, $this->_contextName,
-                            $this->_getExtraParseData(), $this->_complexFlag);
-                        $string = '';
+            foreach ($this->_escapeGroups as $group) {
+                foreach ($group[0] as $escape_char) {
+                    $len = 1;
+                    if ($char == $escape_char 
+                        && (false !== ($len = $this->_shouldBeEscaped(
+                            substr($code, $i + 1), $group[1])))) {
+                        geshi_dbg('Match: len = ' . $len);
+                        if ($string) {
+                            $this->_styler->addParseData($string,
+                                $this->_contextName,
+                                $this->_getExtraParseData(),
+                                $this->_complexFlag);
+                            $string = '';
+                        }
+
+                        $this->_styler->addParseData($escape_char
+                            . substr($code, $i + 1, $len),
+                             "$this->_contextName/$group[2]",
+                             $this->_getExtraParseData(),
+                             $this->_complexFlag);
+                        $i += $len;
+                        $skip = true;
+                        break;
                     }
-                    // Needs a better name than /esc
-                    $this->_styler->addParseData($escape_char . substr($code, $i + 1, $len), $this->_contextName . '/esc',
-                        $this->_getExtraParseData(), $this->_complexFlag);
-                    // FastForward
-                    $i += $len;
-                    $skip = true;
-                    break;
                 }
             }
             
@@ -195,10 +325,14 @@
             }
         }
         if ($string) {
-            $this->_styler->addParseData($string, $this->_contextName, $this->_getExtraParseData(),
+            $this->_styler->addParseData($string, $this->_contextName,
+                $this->_getExtraParseData(),
                 $this->_complexFlag);
         }
-     }
+    }
+    
+    // }}}
+    // {{{ _shouldBeEscaped()
      
     /**
      * Checks whether the character(s) at the start of the parameter string are
@@ -207,16 +341,17 @@
      * @param string The string to check the beginning of for escape characters
      * @return int|false The length of the escape character sequence, else false
      */
-    function _shouldBeEscaped ($code)
+    function _shouldBeEscaped ($code, $chars_to_escape)
     {
         geshi_dbg('Checking: ' . substr($code, 0, 15));
-        foreach ($this->_charsToEscape as $match) {
+        foreach ($chars_to_escape as $match) {
             if ('REGEX' != substr($match, 0, 5)) {
                 geshi_dbg('Test: ' . $match);
                 if (substr($code, 0, 1) == $match) {
                     return 1;
                 }
-            } else {
+            }
+            else {
                 geshi_dbg('  Testing via regex: ' . $match . '... ', false);
                 $data = geshi_get_position($code, $match, 0);
                 if (0 === $data['pos']) {
@@ -229,6 +364,9 @@
         // No matches...
         return false;
     }
+    
+    // }}}
+    
 }
 
 ?>

Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php
===================================================================
--- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php	2008-05-18 15:24:09 UTC (rev 1166)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/functions.geshi.php	2008-05-18 15:33:07 UTC (rev 1167)
@@ -6,10 +6,10 @@
  *   Author: Nigel McNie
  *   E-mail: ni...@ge...
  * </pre>
- * 
+ *
  * For information on how to use GeSHi, please consult the documentation
  * found in the docs/ directory, or online at http://geshi.org/docs/
- * 
+ *
  * This program is part of GeSHi.
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -21,7 +21,7 @@
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
- * 
+ *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
@@ -32,7 +32,7 @@
  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL
  * @copyright  (C) 2004 - 2006 Nigel McNie
  * @version    $Id$
- * 
+ *
  */
 
 $GLOBALS['geshi_dbg'] = false;
@@ -52,7 +52,7 @@
 /**
  * Handles debugging by printing a message according to current debug level,
  * mask of context and other things.
- * 
+ *
  * @param string The message to print out
  * @param int The context in which this message is to be printed out in - see
  *            the GESHI_DBG_* constants
@@ -78,15 +78,15 @@
             case '@b':
                 $start = '<span style="font-weight:bold;">';
                 break;
-            
+
             case '@i':
                 $start = '<span style="font-style:italic;">';
                 break;
-                
+
             case '@o':
                 $start = '<span style="color:green;background-color:#efe;border:1px solid #393;">';
                 break;
-            
+
             case '@w':
                 $start = '<span style="color:#660;background-color:#ffe;border:1px solid #993;">';
                 break;
@@ -94,22 +94,22 @@
             case '@e':
                 $start = '<span style="color:red;background-color:#fee;border:1px solid #933;">';
                 break;
-                
+
             default:
                 $end = '';
         }
-        
+
         if (preg_match('#(.*?)::(.*?)\((.*?)\)#si', $message)) {
             $start = '<span style="font-weight:bold;">';
             $end   = '</span>';
         }
-        
+
         if (preg_match('#^@[a-z]#', $message)) {
             $message = substr($message, 2);
         }
         echo $start . htmlspecialchars(str_replace("\n", '', $message)) . $end;
         if ($add_nl) echo "\n";
-    } 
+    }
 }
 
 /**
@@ -157,56 +157,148 @@
 }
 
 /**
- * Drop-in replacement for strpos and stripos. Also can handle regular expression
+ * A replacement for strpos and stripos that can also handle regular expression
  * string positions.
- * 
+ *
  * @param string The string in which to search for the $needle
- * @param string The string to search for. If this string starts with "REGEX" then
- *               a regular expression search is performed.
- * @param int    The offset in the string in which to start searching
+ * @param string The string to search for. If this string starts with "REGEX"
+ *               then a regular expression search is performed.
+ * @param int    The offset in the string in which to start searching.  Look-
+ *               behind assertions in a regex that refer to characters prior to
+ *               this point will not match.
  * @param boolean Whether the search is case sensitive or not
- * @param boolean Whether the match table is needed (almost never, and it makes things slower)
+ * @param boolean Whether the match table is needed (almost never, and it makes
+ *                things slower, but probably not noticeably).
  * @return array An array of data:
  * <pre> 'pos' => position in string of needle,
  * 'len' => length of match
- * 'tab' => a table of the stuff matched in brackets for a regular expression</pre>
+ * 'tab' => a tabular array containing the parenthesised sub-matches of a
+ *   regular expression.  [0] is the complete match, [1] the first parenthesized
+ *   sub-match, and so on.
+ * </pre>
  * @access private
  */
-function geshi_get_position ($haystack, $needle, $offset = 0, $case_sensitive = false, $need_table = false)
+function geshi_get_position ($haystack, $needle, $offset = 0,
+  $case_sensitive = false, $need_table = false)
 {
     if ('REGEX' != substr($needle, 0, 5)) {
         if (!$case_sensitive) {
-            return array('pos' => stripos($haystack, $needle, $offset), 'len' => strlen($needle));
+            // @todo [blocking 1.1.4] This line is marked by BenBE as
+            // one of the slowest. If you don't have PHP5 then this is
+            // done manually, the function for it should probably be
+            // cleaned up a bit.
+            return array('pos' => stripos($haystack, $needle, $offset),
+              'len' => strlen($needle));
         } else {
-            return array('pos' => strpos($haystack, $needle, $offset), 'len' => strlen($needle));
+            return array('pos' => strpos($haystack, $needle, $offset),
+              'len' => strlen($needle));
         }
     }
-    
+
     $regex = substr($needle, 5);
+    $haystack_offset = substr($haystack, $offset);
+    $table = array();
+    $length = 0;
+    $flags = PREG_SPLIT_OFFSET_CAPTURE;
+    if ($need_table) $flags |= PREG_SPLIT_DELIM_CAPTURE;
+    // @todo [blocking 1.1.4]  This line is marked by BenBE as one of the
+    // slowest. There's not too much that can be done to speed up the line
+    // per se, but possibly something similar to the "here's a character
+    // you can check to see if this is ever going to pass" might be useful.
+    $splits = preg_split($regex, $haystack_offset, 2, $flags);
+    if (count($splits) > 1) {
+        $first = array_shift($splits);
+        $last = array_pop($splits);
+        $pos = strlen($first[0]);
+        $length = $last[1] - $pos;
+        $pos += $offset;
+        if ($need_table) {
+            $table[] = substr($haystack_offset, $pos, $length);
+            foreach ($splits as $match) $table[] = $match[0];
+        }
+    } else $pos = false;
+    return array('pos' => $pos, 'len' => $length, 'tab' => $table);
+}
 
-    // Get the location of the first match of the regular expression    
-    $foo = microtime();
-    $foo_len = strlen($foo);
-    $len = strlen($haystack);
-    $str = preg_replace($regex, $foo, $haystack, 1);
-    $length = $len - (strlen($str) - $foo_len);
+/**
+ * Which, if any, of the strings in the array $substrs occurs at offset $offset
+ * in the string $str?
+ * If $flags contains GESHI_WHICHSS_MAXIMAL, then the largest of multiple
+ * matches will be returned, otherwise and by default: the first encountered.
+ * If $flags contains GESHI_WHICHSS_CASEINSENSITIVE then the comparison will be
+ * case-insensitive; otherwise and by default it will be case-sensitive.
+ * If $flags contains GESHI_WHICHSS_TRYREGEX then the remaining portion of any
+ * string in $substrs that starts with 'REGEX' will be treated as a (Perl-
+ * compatible) regular expression to match, anchored to the start of the string
+ * at $offset.  Look-behind assertions that refer to parts of the string prior
+ * to $offset will not work.  If $flags contains GESHI_WHICHSS_SKIPANCHORINSERT
+ * then the anchor insertion on each regex in $substr will not be performed -
+ * it will be assumed to have already been performed but in any case only
+ * matches at the start of the string will ever be returned.
+ * @return Null if no match is found, otherwise the matching substring, with
+ * case as in the $substrs element rather than the matching portion of $str.
+ */
+define('GESHI_WHICHSS_MAXIMAL', 1);
+define('GESHI_WHICHSS_CASEINSENSITIVE', 2);
+define('GESHI_WHICHSS_TRYREGEX', 4);
+define('GESHI_WHICHSS_SKIPANCHORINSERT', 8);
+function geshi_whichsubstr($str, $substrs, $offset = 0, $flags = 0) {
+    /* Constants */
+    static $re_starter_c = 'REGEX';
+    static $re_starter_len_c = 5/*strlen($re_starter_c)*/;
 
-    // Return match table if requested 
-    if ($need_table) {
-        $matches = array();
-        preg_match_all($regex, $haystack, $matches);
-        $i = 0;
-        $table = array();
-        foreach ( $matches as $match ) {
-            $table[$i++] = (isset($match[0])) ? $match[0] : null;
+    $ret = null;
+    $max_len = -1;
+    foreach ($substrs as $substr) {
+        if (($flags & GESHI_WHICHSS_TRYREGEX) &&
+          strncmp($substr,$re_starter_c,$re_starter_len_c)==0) {
+            $re = substr($substr, $re_starter_len_c);
+            if (!($flags & GESHI_WHICHSS_SKIPANCHORINSERT)) {
+                $re = geshi_anchor_re($re);
+            }
+            $haystack = $offset > 0 ? substr($str, $offset) : $str;
+            $match = preg_match($re, $haystack, $matches, PREG_OFFSET_CAPTURE) ?
+              $matches[0][0] : null;
+            $len = strlen($match);
+            /* This code is reached only if GESHI_WHICHSS_SKIPANCHORINSERT was
+             * specified without a pre-existing anchor and with a match that
+             * started beyond $offset.
+             */
+            if ($match !== null && $matches[0][1]) $len = $match = null;
+        } else {
+            $len = strlen($substr);
+            if (!($flags & GESHI_WHICHSS_CASEINSENSITIVE)) {
+                $match = substr($str,$offset,$len) == $substr ? $substr : null;
+            } else if (strcasecmp(substr($str, $offset, $len), $substr) == 0) {
+                $match = $substr;
+            } else $match = null;
         }
-    } else {
-        $table = array();
+        if ($match !== null) {
+            if (!($flags & GESHI_WHICHSS_MAXIMAL)) {
+                $ret = $match;
+                break;
+            } else if ($len > $max_len) {
+                $ret = $match;
+                $max_len = $len;
+            }
+        }
     }
-    return array('pos' => strpos($str, $foo), 'len' => $length, 'tab' => $table);
+    return $ret;
 }
 
 /**
+ * Safely inserts an anchor into the regex $regex so that it only matches at the
+ * start of the searched string.
+ * @return string The regex with anchor inserted.
+ */
+function geshi_anchor_re($regex) {
+    $delim = $regex{0};
+    $endPos = strrpos($regex, $delim);
+    $endChars = substr($regex, $endPos);
+    return "$delim^(".substr($regex, 1, $endPos - 1).')'.$endChars;
+}
+
+/**
  * @todo [blocking 1.1.5] Octal/hexadecimal numbers are common, so should have functions
  *       for those, and make sure that integers/doubles do not collide
  * @access private
@@ -236,9 +328,9 @@
 //
 /**
  * Replace stripos()
- * 
+ *
  * This function lifted from the PHP_Compat PEAR package, and optimised
- * 
+ *
  * @author      Aidan Lister <ai...@ph...>, Nigel McNie <ni...@ge...>
  * @version     $Revision$
  * @access private
@@ -267,7 +359,7 @@
 
 /**
  * Returns the GeSHi_Styler object used to help with parsing
- * 
+ *
  * @param boolean $force_new If true, forces the creation of
  *                           a new GeSHi_Parser object
  * @return GeSHi_Styler

Copied: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES (from rev 1166, tags/RELEASE_1_1_2_ALPHA3/geshi-src/geshi/languages/c/NOTES)
===================================================================
--- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES	                        (rev 0)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/NOTES	2008-05-18 15:33:07 UTC (rev 1167)
@@ -0,0 +1,87 @@
+Here are several notes on C highlighting as originally contained as extended
+comments within c.php.  Mostly this relates to preprocessor-context
+highlighting and the situations in which the C parser function
+GeSHiCCodeParser::parseToken() adjusts it.
+
+== (Un)Highlighted keywords in the preprocessor context ==
+
+It might seem questionable at first whether declarator/type/qualifier keywords,
+standard functions and standard macros or objects will ever occur, thus
+requiring highlighting, within some preprocessor directives - namely #(el)if,
+#ifdef, #ifndef and #undef.  They can and do occur in practice though because
+these directives can be used to test whether at preprocessor level the keyword,
+type or function in question has been subverted (or for a function, whether
+it's been legitimately defined as a macro), and/or to undo or change that
+subversion; for #if/#elif, sizeof should be highlighted in any case - it's been
+categorised as a standard function for GeSHi's purposes.
+
+For #(el)if, a type might also appear as the subject of sizeof.
+
+It's also debatable whether such tokens should be highlighted within #error and
+#pragma directives - it seems most appropriate that they are not, because  within
+those directives their occurrence can be likened to their appearance within a
+comment.  GeSHiCCodeParser::parseToken() therefore adjusts those contexts;
+their highlighting when the parser is disabled is tolerable as a minor glitch.
+
+It's less debatable that within a #include filename, these keywords should not
+be highlighted.  That's handled in GeSHiCCodeParser::parseToken() for <>
+includes - quoted includes are already protected by the string_literal context
+(which parseToken() reclassifies).  It's borderline tolerable that this
+incorrect highlighting will appear when the parser is disabled.
+
+Within a #include where the filename is specified by a macro, the only keywords
+that should be highlighted out of the list at the top of this section are:
+standard macros (because they might be used in a stringising macro "call"), any
+standard functions that are implementable as macros (for the same reason),
+"sizeof" (because it might be used to generate an argument for a macro "call")
+and types (but not qualifiers) where they appear as the subject of sizeof.  The
+remainder have no meaning in preprocessor macro-"call" context.  Separating out
+"implementable-as-a-macro" from the other standard functions is a longer-term
+future task to complete alongside comprehensively filling out what's missing
+from the keyword lists.  Separating qualifiers from types is another task to
+consider.  To start with, GeSHiCCodeParser::parseToken() disables highlighting
+for the context 'declarator-keyword' within #include:s where the filename is
+specified by a macro, and /all/ highlighting is disabled for the macro name
+itself - i.e. highlighting applies only to macro arguments.
+
+The same reasoning of the above paragraph can be applied to the #line directive
+where its "arguments" are specified by a macro: GeSHiCCodeParser::parseToken()
+similarly disables highlighting in that situation.
+
+== Symbols in C preprocessor directives ==
+
+Not all of the symbols added by the call:
+    $context->addSymbolGroup(geshi_c_get_standard_symbols(),
+      'c/c/preprocessor/symbol');
+have meaning for all preprocessor directives and in some directives they are
+illegal.  This GeSHi C module assumes well-formed input code so illegal
+occurrences need not concern it.
+
+In #(el)if directives, any symbol except the semicolon can legally occur.
+At first it might seem that & has no place either because at preprocessing stage
+no objects exist to take an address of, but & can also act as a bitwise
+operator or be part of the logical && operator.  Due to the lack of objects it
+might also at first seem that [] has no use, however it can be applied to
+string literals for esoteric uses in a preprocessor constant such as this
+expression equating to 1:
+"abcd"[1] == 'b'
+A semicolon though is only used to end single statements in code - this can't
+apply to a constant preprocessor expression.
+
+In #include and #line directives, the header filename and new effective source
+file name (respectively) may be specified by a macro.  A macro may take a
+constant preprocessor expression as an argument, so by this reasoning it can be
+seen that within #include and #line directives the same set of symbols can
+occur as within an #(el)if directive - namely, anything except a semicolon.
+
+In a #define, even a semicolon can occur because the macro can substitute for
+code.
+
+#ifdef, #ifndef, #undef, #endif and #else do not allow any symbol except by
+proxy for comments and line continuation slashes.
+
+Likewise for #error and #pragma except that any symbol could occur as part of
+the subsequent (unquoted) freeform text.  These should not be highlighted, and
+thus GeSHiCCodeParser::parseToken() recontextualises them so that they aren't
+highlighted.  Their highlighting when the parser is disabled is tolerable as a
+minor glitch.

Modified: branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php
===================================================================
--- branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php	2008-05-18 15:24:09 UTC (rev 1166)
+++ branches/RELEASE_1_1_X_DEVEL/geshi-src/geshi/languages/c/c.php	2008-05-18 15:33:07 UTC (rev 1167)
@@ -54,33 +54,34 @@
     $context->addChild('multi_comment');
     $context->addChild('single_comment');
     $context->addChild('string_literal', 'string');
+    $context->addChild('widestring_literal', 'string');
     $context->addChild('character_constant', 'singlechar');
+    $context->addChild('widecharacter_constant', 'singlechar');
     $context->addChild('preprocessor', 'code');
-    
+
     $context->addKeywordGroup(geshi_c_get_ctlflow_keywords(),
       'ctlflow-keyword', true, geshi_c_get_ctlflow_keywords_url());
 
     $context->addKeywordGroup(geshi_c_get_declarator_keywords(),
       'declarator-keyword', true, geshi_c_get_declarator_keywords_url());
-    
+
     $context->addKeywordGroup(geshi_c_get_types_and_qualifiers(),
       'typeorqualifier', true, geshi_c_get_types_and_qualifiers_url());
-    
+
     $context->addKeywordGroup(geshi_c_get_standard_functions(),
       'stdfunction', true, geshi_c_get_standard_functions_url());
-    
+
     $context->addKeywordGroup(geshi_c_get_standard_macros_and_objects(),
       'stdmacroorobject', true, geshi_c_get_standard_macros_and_objects_url());
-    
+
     $context->addSymbolGroup(geshi_c_get_standard_symbols(), 'symbol');
-    
+
     $context->useStandardIntegers();
     $context->useStandardDoubles(array('chars_after_number' => array('f','l')));
-    
+
     $context->addObjectSplitter(geshi_c_get_structure_access_symbols(),
       'member', 'symbol');
     $context->setComplexFlag(GESHI_COMPLEX_TOKENISE);
-    
 }
 
 function geshi_c_c_multi_comment (&$context)
@@ -91,45 +92,60 @@
 
 function geshi_c_c_single_comment (&$context)
 {
-    $context->addDelimiters('//', 'REGEX#(?<!\\\)\n#');
+    $context->addDelimiters('//', "\n");
     $context->setComplexFlag(GESHI_COMPLEX_PASSALL);
-    // without this, detection of a following preprocessor directive is
-    // inhibited [due to changes this might no longer apply]
+    /* Without this, and when the comment occurs at the end of a preprocessor
+     * directive, any immediately subsequent preprocessor directive is treated
+     * as a continuation of the first one. */
     $context->parseDelimiters(GESHI_CHILD_PARSE_LEFT);
 }
 
+/* A (wide)string literal may be continued to the next line through the use of a
+ * trailing \ but otherwise multiline strings are illegal.  This code doesn't
+ * attempt to mark erroroneous multiline strings, and slash-continuation is
+ * handled generically in GeSHiCCodeParser::parseToken().  This code does
+ * terminate strings on newlines though due to the legality of the appearance
+ * of unmatched double quote marks in #error and #pragma directives.
+ * GeSHiCCodeParser::parseToken() later unhighlights such unterminated strings
+ * but they can't be allowed to continue over the line otherwise the
+ * #error/#pragma directive will be incorrectly continued over multiple lines.
+ * prior to GeSHiCCodeParser receiving it.
+ */
 function geshi_c_c_string_literal (&$context)
 {
-    /*
-     * A string literal may be continued to the next line with a trailing \ but
-     * otherwise multiline strings are illegal; we don't attempt to mark that
-     * error here though.
-     */
-    $context->addDelimiters('"', '"');
-
-    $context->setEscapeCharacters('\\');
-    /** @todo string literals and character constants may be immediately
-      * preceded by a capital L to indicate a wide-character constant and it
-      * would be nice to include that in the highlighting.
-      */
-    $context->setCharactersToEscape(array("'", '?', 'a', 'b', 'f',
-        'v', 'n', 'r', 't', 'REGEX#[0-7]{1,3}#',
-        'REGEX#x[0-9a-f]{1,}#i', '\\', '"'));
+    geshi_c_base_string($context, '"', array('"', 'REGEX#(?=\n)#'), false);
 }
-
+function geshi_c_c_widestring_literal (&$context)
+{
+    geshi_c_base_string($context, 'L"', array('"', 'REGEX#(?=\n)#'), true);
+}
 function geshi_c_c_character_constant (&$context)
 {
-    $context->addDelimiters("'", "'");
+    geshi_c_base_singlechar($context, "'", "'", false);
+    $context->setDisallowEmptyChars();
+}
+function geshi_c_c_widecharacter_constant (&$context)
+{
+    geshi_c_base_singlechar($context, "L'", "'", true);
+    $context->setDisallowEmptyChars();
+}
 
-    $context->setEscapeCharacters('\\');
+function geshi_c_base_string (&$context, $delim_start, $delim_end, $delim_cs) {
+    $context->addDelimiters($delim_start, $delim_end, $delim_cs);
+    $context->addEscapeGroup('\\', array("'", '"', '?', '\\', 'a', 'b', 'f',
+        'n', 'r', 't', 'v', 'REGEX#([0-7]{1,3}|x[0-9a-f]{1,})#i'));
+    $context->setComplexFlag(GESHI_COMPLEX_PASSALL);
+}
 
-    /** @todo same todo as for geshi_c_c_string_literal(). */
-    $context->setCharactersToEscape(array("'", '?', 'a', 'b', 'f',
-        'v', 'n', 'r', 't', 'REGEX#[0-7]{1,3}#',
-        'REGEX#x[0-9a-f]{1,}#i', '\\', '"'));
+function geshi_c_base_singlechar (&$context, $delim_start, $delim_end, $delim_cs) {
+    $context->addDelimiters($delim_start, $delim_end, $delim_cs);
+    $context->setEscapeCharacters('\\');
+    $context->setCharactersToEscape(array("'", '"', '?', '\\', 'a', 'b', 'f',
+        'n', 'r', 't', 'v', 'REGEX#([0-7]{1,3}|x[0-9a-f]{1,})#i'));
+    $context->setComplexFlag(GESHI_COMPLEX_PASSALL);
 }
 
-/**
+/*
  * Duplicate these functions for the preprocessor simply so that they can have
  * a different highlighting context.
  */
@@ -143,127 +159,60 @@
 }
 function geshi_c_c_preprocessor_string_literal (&$context)
 {
-    geshi_c_c_string_literal($context);
+    geshi_c_c_string_literal ($context);
 }
+function geshi_c_c_preprocessor_widestring_literal (&$context)
+{
+    geshi_c_c_widestring_literal ($context);
+}
 function geshi_c_c_preprocessor_character_constant (&$context)
 {
     geshi_c_c_character_constant ($context);
 }
+function geshi_c_c_preprocessor_widecharacter_constant (&$context)
+{
+    geshi_c_c_widecharacter_constant ($context);
+}
 
 function geshi_c_c_preprocessor (&$context)
 {
-    /**
-     * A preprocessing directive beginning with a # must occur at the start
+    /* A preprocessing directive beginning with a # must occur at the start
      * of a line, but may optionally be preceded by whitespace.  The hash may
-     * optionally be followed by whitespace in the same manner, after which
-     * the actual directive keyword is specified.  Finally though, a hash
-     * without a following directive is allowed as a 'null directive'.
+     * optionally be followed by whitespace, after which the actual directive
+     * keyword is specified.  Finally though, a hash without a following
+     * directive is allowed as a 'null directive'.
      *
      * There is also a single preprocessing directive (_Pragma) that follows
-     * the same rules but is not preceded by a hash
+     * the same rules but that is not preceded by a hash
      *
      * The list of non-newline whitespace characters recognised by C and
-     * used in the r.e. below is: [ \t\f\v]
-  ...
 
[truncated message content]